Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -432,11 +432,11 @@ def eval_func_for_nc(model_tuned):
acc = result[key]
break
return acc
from neural_compressor.experimental import Quantization, common
quantizer = Quantization("./conf.yaml")
quantizer.model = common.Model(model)
quantizer.eval_func = eval_func_for_nc
q_model = quantizer.fit()
from neural_compressor.quantization import fit
from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion
tuning_criterion = TuningCriterion(max_trials=600)
conf = PostTrainingQuantConfig(approach="dynamic", backend="pytorch", tuning_criterion=tuning_criterion)
q_model = fit(model, conf=conf, eval_func=eval_func_for_nc)
from neural_compressor.utils.load_huggingface import save_for_huggingface_upstream
save_for_huggingface_upstream(q_model, tokenizer, training_args.output_dir)
exit(0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -498,13 +498,11 @@ def eval_func(model):

# optimize and quantize with Neural Compressor
if model_args.tune:
from neural_compressor.experimental import Quantization, common
calib_dataloader = eval_dataloader
quantizer = Quantization('conf.yaml')
quantizer.eval_func = eval_func
quantizer.calib_dataloader = calib_dataloader
quantizer.model = common.Model(model)
model = quantizer.fit()
from neural_compressor.quantization import fit
from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion
tuning_criterion = TuningCriterion(max_trials=600)
conf = PostTrainingQuantConfig(approach="static", backend="pytorch_fx", tuning_criterion=tuning_criterion)
model = fit(model, conf=conf, calib_dataloader=eval_dataloader, eval_func=eval_func)
from neural_compressor.utils.load_huggingface import save_for_huggingface_upstream
save_for_huggingface_upstream(model, tokenizer, training_args.output_dir)
return
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -502,12 +502,6 @@ def compute_metrics(p: EvalPrediction):
eval_dataloader = trainer.get_eval_dataloader()
batch_size = eval_dataloader.batch_size

def train_func(model):
trainer.model_wrapped = model
trainer.model = model
trainer.train()
return trainer.model

def eval_func(model):
trainer.model = model
result = trainer.evaluate(eval_dataset=eval_dataset)
Expand All @@ -526,12 +520,17 @@ def benchmark(model):

# optimize and quantize with Neural Compressor
if model_args.tune:
from neural_compressor.experimental import Quantization, common
quantizer = Quantization('conf_qat.yaml')
quantizer.eval_func = eval_func
quantizer.q_func = train_func
quantizer.model = common.Model(model)
model = quantizer.fit()
from neural_compressor.training import prepare_compression
from neural_compressor.config import QuantizationAwareTrainingConfig
conf = QuantizationAwareTrainingConfig(backend="pytorch_fx")
compression_manager = prepare_compression(model, conf)
compression_manager.callbacks.on_train_begin()
model = compression_manager.model
trainer.model_wrapped = model
trainer.model = model
trainer.train()
compression_manager.callbacks.on_train_end()

from neural_compressor.utils.load_huggingface import save_for_huggingface_upstream
save_for_huggingface_upstream(model, tokenizer, training_args.output_dir)
return
Expand Down
7 changes: 4 additions & 3 deletions neural_compressor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
from .utils.utility import set_backend
from .utils import options
from .conf.config import conf
from .conf.pythonic_config import config, DistillationConfig, Options, PostTrainingConfig, \
PruningConfig, QuantizationAwareTrainingConfig
from .conf.pythonic_config import config
from .config import DistillationConfig, PostTrainingQuantConfig, \
PruningConfig, QuantizationAwareTrainingConfig

set_backend('NA')
set_backend('NA')
20 changes: 9 additions & 11 deletions neural_compressor/adaptor/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -2661,12 +2661,11 @@ def quantize(self, tune_cfg, model, dataloader, q_func=None):
self.tune_cfg = tune_cfg
self.tune_cfg["approach"] = self.approach
self.tune_cfg["framework"] = "pytorch_fx"
# pragma: no cover
if self.approach != 'post_training_dynamic_quant' and self.version.release >= Version("1.13.0").release:
assert dataloader is not None, "Please pass a dataloader to quantizer!"
example_inputs = get_example_inputs(model._model, dataloader)
else:
example_inputs = None

# PyTorch 1.13 and above version, need example_inputs for fx trace, but it not realy used,
# so set it to None.
example_inputs = None

if self.default_qconfig is not None:
default_qconfig = copy.deepcopy(self.default_qconfig)
default_qconfig['activation']['dtype'] = \
Expand Down Expand Up @@ -2861,11 +2860,10 @@ def _pre_hook_for_qat(self, dataloader=None):
from torch.quantization.quantize_fx import prepare_qat_fx
fx_op_cfgs = _cfgs_to_fx_cfgs(quantized_ops, 'quant_aware_training')
self.model._model.train()
if self.version.release >= Version("1.13.0").release: # pragma: no cover
assert dataloader is not None, "Please pass dataloader to qat hook!"
example_inputs = get_example_inputs(self.model._model, dataloader)
else:
example_inputs = None

# PyTorch 1.13 and above version, need example_inputs for fx trace, but it not realy used,
# so set it to None.
example_inputs = None

if self.sub_module_list is None:
if self.version.release >= Version("1.13.0").release: # pragma: no cover
Expand Down
9 changes: 8 additions & 1 deletion neural_compressor/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
from .utils import logger
from .data import DATALOADERS
from .experimental import Benchmark as ExpBenchmark
from .conf.pythonic_config import Config
from .config import BenchmarkConfig

class Benchmark(object):
"""Benchmark class can be used to evaluate the model performance, with the objective
Expand Down Expand Up @@ -67,9 +69,11 @@ def postprocess(self, name, postprocess_cls, **kwargs):
self.exp_benchmarker.postprocess = nc_postprocess


def benchmark(
def fit(
model, config=None, b_dataloader=None, b_func=None
):
if isinstance(config, BenchmarkConfig):
config = Config(benchmark=config)
benchmarker = ExpBenchmark(config)
benchmarker.model = model
if b_func is not None:
Expand All @@ -78,3 +82,6 @@ def benchmark(
benchmarker.b_dataloader = b_dataloader
benchmarker()
return benchmarker.results


benchmark = fit
Loading