intel · chensuyue · Nov 25, 2022 · Nov 9, 2022 · Nov 14, 2022 · Nov 16, 2022
diff --git a/...lp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/run_glue_tune.py b/...lp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/run_glue_tune.py
@@ -432,11 +432,11 @@ def eval_func_for_nc(model_tuned):
                     acc = result[key]
                     break
             return acc
-        from neural_compressor.experimental import Quantization, common
-        quantizer = Quantization("./conf.yaml")
-        quantizer.model = common.Model(model)
-        quantizer.eval_func = eval_func_for_nc
-        q_model = quantizer.fit()
+        from neural_compressor.quantization import fit
+        from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion
+        tuning_criterion = TuningCriterion(max_trials=600)
+        conf = PostTrainingQuantConfig(approach="dynamic", backend="pytorch", tuning_criterion=tuning_criterion)
+        q_model = fit(model, conf=conf, eval_func=eval_func_for_nc)
         from neural_compressor.utils.load_huggingface import save_for_huggingface_upstream
         save_for_huggingface_upstream(q_model, tokenizer, training_args.output_dir)
         exit(0)

diff --git a/...pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_glue.py b/...pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_glue.py
@@ -498,13 +498,11 @@ def eval_func(model):
 
     # optimize and quantize with Neural Compressor
     if model_args.tune:
-        from neural_compressor.experimental import Quantization, common
-        calib_dataloader = eval_dataloader
-        quantizer = Quantization('conf.yaml')
-        quantizer.eval_func = eval_func
-        quantizer.calib_dataloader = calib_dataloader
-        quantizer.model = common.Model(model)
-        model = quantizer.fit()
+        from neural_compressor.quantization import fit
+        from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion
+        tuning_criterion = TuningCriterion(max_trials=600)
+        conf = PostTrainingQuantConfig(approach="static", backend="pytorch_fx", tuning_criterion=tuning_criterion)
+        model = fit(model, conf=conf, calib_dataloader=eval_dataloader, eval_func=eval_func)
         from neural_compressor.utils.load_huggingface import save_for_huggingface_upstream
         save_for_huggingface_upstream(model, tokenizer, training_args.output_dir)
         return

diff --git a/...s/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_glue_tune.py b/...s/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_glue_tune.py
@@ -502,12 +502,6 @@ def compute_metrics(p: EvalPrediction):
     eval_dataloader = trainer.get_eval_dataloader()
     batch_size = eval_dataloader.batch_size
 
-    def train_func(model):
-        trainer.model_wrapped = model
-        trainer.model = model
-        trainer.train()
-        return trainer.model
-
     def eval_func(model):
         trainer.model = model
         result = trainer.evaluate(eval_dataset=eval_dataset)
@@ -526,12 +520,17 @@ def benchmark(model):
 
     # optimize and quantize with Neural Compressor
     if model_args.tune:
-        from neural_compressor.experimental import Quantization, common
-        quantizer = Quantization('conf_qat.yaml')
-        quantizer.eval_func = eval_func
-        quantizer.q_func = train_func
-        quantizer.model = common.Model(model)
-        model = quantizer.fit()
+        from neural_compressor.training import prepare_compression
+        from neural_compressor.config import QuantizationAwareTrainingConfig
+        conf = QuantizationAwareTrainingConfig(backend="pytorch_fx")
+        compression_manager = prepare_compression(model, conf)
+        compression_manager.callbacks.on_train_begin()
+        model = compression_manager.model
+        trainer.model_wrapped = model
+        trainer.model = model
+        trainer.train()
+        compression_manager.callbacks.on_train_end()
+
         from neural_compressor.utils.load_huggingface import save_for_huggingface_upstream
         save_for_huggingface_upstream(model, tokenizer, training_args.output_dir)
         return

diff --git a/neural_compressor/__init__.py b/neural_compressor/__init__.py
@@ -24,7 +24,8 @@
 from .utils.utility import set_backend
 from .utils import options
 from .conf.config import conf
-from .conf.pythonic_config import config, DistillationConfig, Options, PostTrainingConfig, \
-                                  PruningConfig, QuantizationAwareTrainingConfig
+from .conf.pythonic_config import config
+from .config import DistillationConfig, PostTrainingQuantConfig, \
+                    PruningConfig, QuantizationAwareTrainingConfig
 
-set_backend('NA')
+set_backend('NA')
diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py
@@ -2661,12 +2661,11 @@ def quantize(self, tune_cfg, model, dataloader, q_func=None):
         self.tune_cfg = tune_cfg
         self.tune_cfg["approach"] = self.approach
         self.tune_cfg["framework"] = "pytorch_fx"
-        # pragma: no cover
-        if self.approach != 'post_training_dynamic_quant' and self.version.release >= Version("1.13.0").release:
-            assert dataloader is not None, "Please pass a dataloader to quantizer!"
-            example_inputs = get_example_inputs(model._model, dataloader)
-        else:
-            example_inputs = None
+
+        # PyTorch 1.13 and above version, need example_inputs for fx trace, but it not realy used,
+        # so set it to None.
+        example_inputs = None
+
         if self.default_qconfig is not None:
             default_qconfig = copy.deepcopy(self.default_qconfig)
             default_qconfig['activation']['dtype'] = \
@@ -2861,11 +2860,10 @@ def _pre_hook_for_qat(self, dataloader=None):
         from torch.quantization.quantize_fx import prepare_qat_fx
         fx_op_cfgs = _cfgs_to_fx_cfgs(quantized_ops, 'quant_aware_training')
         self.model._model.train()
-        if self.version.release >= Version("1.13.0").release:  # pragma: no cover
-            assert dataloader is not None, "Please pass dataloader to qat hook!"
-            example_inputs = get_example_inputs(self.model._model, dataloader)
-        else:
-            example_inputs = None
+
+        # PyTorch 1.13 and above version, need example_inputs for fx trace, but it not realy used,
+        # so set it to None.
+        example_inputs = None
 
         if self.sub_module_list is None:
             if self.version.release >= Version("1.13.0").release:  # pragma: no cover

diff --git a/neural_compressor/benchmark.py b/neural_compressor/benchmark.py
@@ -18,6 +18,8 @@
 from .utils import logger
 from .data import DATALOADERS
 from .experimental import Benchmark as ExpBenchmark
+from .conf.pythonic_config import Config
+from .config import BenchmarkConfig
 
 class Benchmark(object):
     """Benchmark class can be used to evaluate the model performance, with the objective
@@ -67,9 +69,11 @@ def postprocess(self, name, postprocess_cls, **kwargs):
         self.exp_benchmarker.postprocess = nc_postprocess
 
 
-def benchmark(
+def fit(
     model, config=None, b_dataloader=None, b_func=None
 ):
+    if isinstance(config, BenchmarkConfig):
+        config = Config(benchmark=config)
     benchmarker = ExpBenchmark(config)
     benchmarker.model = model
     if b_func is not None:
@@ -78,3 +82,6 @@ def benchmark(
         benchmarker.b_dataloader = b_dataloader
     benchmarker()
     return benchmarker.results
+
+
+benchmark = fit