diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/run_glue_tune.py b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/run_glue_tune.py index 2da5db448cb..13812b30b4e 100755 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/run_glue_tune.py +++ b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/run_glue_tune.py @@ -432,11 +432,11 @@ def eval_func_for_nc(model_tuned): acc = result[key] break return acc - from neural_compressor.experimental import Quantization, common - quantizer = Quantization("./conf.yaml") - quantizer.model = common.Model(model) - quantizer.eval_func = eval_func_for_nc - q_model = quantizer.fit() + from neural_compressor.quantization import fit + from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion + tuning_criterion = TuningCriterion(max_trials=600) + conf = PostTrainingQuantConfig(approach="dynamic", backend="pytorch", tuning_criterion=tuning_criterion) + q_model = fit(model, conf=conf, eval_func=eval_func_for_nc) from neural_compressor.utils.load_huggingface import save_for_huggingface_upstream save_for_huggingface_upstream(q_model, tokenizer, training_args.output_dir) exit(0) diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_glue.py b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_glue.py index 8ea43ea4a41..717ae91d886 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_glue.py +++ b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_glue.py @@ -498,13 +498,11 @@ def eval_func(model): # optimize and quantize with Neural Compressor if model_args.tune: - from neural_compressor.experimental import Quantization, common - calib_dataloader = eval_dataloader - quantizer = Quantization('conf.yaml') - quantizer.eval_func = eval_func - quantizer.calib_dataloader = calib_dataloader - quantizer.model = common.Model(model) - model = quantizer.fit() + from neural_compressor.quantization import fit + from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion + tuning_criterion = TuningCriterion(max_trials=600) + conf = PostTrainingQuantConfig(approach="static", backend="pytorch_fx", tuning_criterion=tuning_criterion) + model = fit(model, conf=conf, calib_dataloader=eval_dataloader, eval_func=eval_func) from neural_compressor.utils.load_huggingface import save_for_huggingface_upstream save_for_huggingface_upstream(model, tokenizer, training_args.output_dir) return diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_glue_tune.py b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_glue_tune.py index 79c785850c0..f5bc771e712 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_glue_tune.py +++ b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_glue_tune.py @@ -502,12 +502,6 @@ def compute_metrics(p: EvalPrediction): eval_dataloader = trainer.get_eval_dataloader() batch_size = eval_dataloader.batch_size - def train_func(model): - trainer.model_wrapped = model - trainer.model = model - trainer.train() - return trainer.model - def eval_func(model): trainer.model = model result = trainer.evaluate(eval_dataset=eval_dataset) @@ -526,12 +520,17 @@ def benchmark(model): # optimize and quantize with Neural Compressor if model_args.tune: - from neural_compressor.experimental import Quantization, common - quantizer = Quantization('conf_qat.yaml') - quantizer.eval_func = eval_func - quantizer.q_func = train_func - quantizer.model = common.Model(model) - model = quantizer.fit() + from neural_compressor.training import prepare_compression + from neural_compressor.config import QuantizationAwareTrainingConfig + conf = QuantizationAwareTrainingConfig(backend="pytorch_fx") + compression_manager = prepare_compression(model, conf) + compression_manager.callbacks.on_train_begin() + model = compression_manager.model + trainer.model_wrapped = model + trainer.model = model + trainer.train() + compression_manager.callbacks.on_train_end() + from neural_compressor.utils.load_huggingface import save_for_huggingface_upstream save_for_huggingface_upstream(model, tokenizer, training_args.output_dir) return diff --git a/neural_compressor/__init__.py b/neural_compressor/__init__.py index 6bdf202786c..bc46fdbd916 100644 --- a/neural_compressor/__init__.py +++ b/neural_compressor/__init__.py @@ -24,7 +24,8 @@ from .utils.utility import set_backend from .utils import options from .conf.config import conf -from .conf.pythonic_config import config, DistillationConfig, Options, PostTrainingConfig, \ - PruningConfig, QuantizationAwareTrainingConfig +from .conf.pythonic_config import config +from .config import DistillationConfig, PostTrainingQuantConfig, \ + PruningConfig, QuantizationAwareTrainingConfig -set_backend('NA') +set_backend('NA') \ No newline at end of file diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index 833d011f858..2be7529366f 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -2661,12 +2661,11 @@ def quantize(self, tune_cfg, model, dataloader, q_func=None): self.tune_cfg = tune_cfg self.tune_cfg["approach"] = self.approach self.tune_cfg["framework"] = "pytorch_fx" - # pragma: no cover - if self.approach != 'post_training_dynamic_quant' and self.version.release >= Version("1.13.0").release: - assert dataloader is not None, "Please pass a dataloader to quantizer!" - example_inputs = get_example_inputs(model._model, dataloader) - else: - example_inputs = None + + # PyTorch 1.13 and above version, need example_inputs for fx trace, but it not realy used, + # so set it to None. + example_inputs = None + if self.default_qconfig is not None: default_qconfig = copy.deepcopy(self.default_qconfig) default_qconfig['activation']['dtype'] = \ @@ -2861,11 +2860,10 @@ def _pre_hook_for_qat(self, dataloader=None): from torch.quantization.quantize_fx import prepare_qat_fx fx_op_cfgs = _cfgs_to_fx_cfgs(quantized_ops, 'quant_aware_training') self.model._model.train() - if self.version.release >= Version("1.13.0").release: # pragma: no cover - assert dataloader is not None, "Please pass dataloader to qat hook!" - example_inputs = get_example_inputs(self.model._model, dataloader) - else: - example_inputs = None + + # PyTorch 1.13 and above version, need example_inputs for fx trace, but it not realy used, + # so set it to None. + example_inputs = None if self.sub_module_list is None: if self.version.release >= Version("1.13.0").release: # pragma: no cover diff --git a/neural_compressor/benchmark.py b/neural_compressor/benchmark.py index 30e3bf8aa28..87d425a846b 100644 --- a/neural_compressor/benchmark.py +++ b/neural_compressor/benchmark.py @@ -18,6 +18,8 @@ from .utils import logger from .data import DATALOADERS from .experimental import Benchmark as ExpBenchmark +from .conf.pythonic_config import Config +from .config import BenchmarkConfig class Benchmark(object): """Benchmark class can be used to evaluate the model performance, with the objective @@ -67,9 +69,11 @@ def postprocess(self, name, postprocess_cls, **kwargs): self.exp_benchmarker.postprocess = nc_postprocess -def benchmark( +def fit( model, config=None, b_dataloader=None, b_func=None ): + if isinstance(config, BenchmarkConfig): + config = Config(benchmark=config) benchmarker = ExpBenchmark(config) benchmarker.model = model if b_func is not None: @@ -78,3 +82,6 @@ def benchmark( benchmarker.b_dataloader = b_dataloader benchmarker() return benchmarker.results + + +benchmark = fit diff --git a/neural_compressor/conf/pythonic_config.py b/neural_compressor/conf/pythonic_config.py index 89d0b773d40..49eaa1738e7 100644 --- a/neural_compressor/conf/pythonic_config.py +++ b/neural_compressor/conf/pythonic_config.py @@ -16,377 +16,12 @@ # limitations under the License. import logging -import datetime -from typing import List -from schema import Schema, And, Use, Optional, Or from .dotdict import DotDict -from .config import Pruner +from ..config import _BaseQuantizationConfig, accuracy_criterion, BenchmarkConfig, \ + check_value, DistillationConfig, options, PruningConfig logger = logging.getLogger("neural_compressor") -default_workspace = './nc_workspace/{}/'.format( - datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) - -ops_schema = Schema({ - Optional('weight', default=None): { - Optional('granularity'): And( - list, - lambda s: all(i in ['per_channel', 'per_tensor'] for i in s)), - Optional('scheme'): And( - list, - lambda s: all(i in ['asym', 'sym', 'asym_float'] for i in s)), - Optional('dtype'): And( - list, - lambda s: all(i in ['int8', 'uint8', 'fp32', 'bf16', 'fp16'] for i in s)), - Optional('algorithm'): And( - list, - lambda s: all(i in ['minmax'] for i in s))}, - Optional('activation', default=None): { - Optional('granularity'): And( - list, - lambda s: all(i in ['per_channel', 'per_tensor'] for i in s)), - Optional('scheme'): And( - list, - lambda s: all(i in ['asym', 'sym'] for i in s)), - Optional('dtype'): And( - list, - lambda s: all(i in ['int8', 'uint8', 'fp32', 'bf16', 'fp16', 'None'] for i in s)), - Optional('algorithm'): And( - list, - lambda s: all(i in ['minmax', 'kl', 'placeholder'] for i in s))}}) - - -def check_value(name, src, supported_type, supported_value=[]): - if isinstance(src, list) and any([not isinstance(i, supported_type) for i in src]): - logger.warning("Type of {} items should be {} but not {}, " \ - "use its default value.".format(name, str(supported_type), [type(i) for i in src])) - return False - elif not isinstance(src, list) and not isinstance(src, supported_type): - logger.warning("Type of {} should be {} but not {}, " \ - "use its default value.".format(name, str(supported_type), type(src))) - return False - - if len(supported_value) > 0: - if isinstance(src, str) and src not in supported_value: - logger.warning("{} is not in supported {}: {}. Skip setting it and" \ - " use default value.".format(src, name, str(supported_value))) - return False - elif isinstance(src, list) and all([isinstance(i, str) for i in src]) and \ - any([i not in supported_value for i in src]): - logger.warning("{} is not in supported {}: {}. Skip setting it and" \ - " use default value.".format(src, name, str(supported_value))) - return False - - return True - -class BenchmarkConfig: - def __init__(self, warmup=5, iteration=-1, cores_per_instance=None, num_of_instance=None, - inter_num_of_threads=None, intra_num_of_threads=None): - self._warmup = warmup - self._iteration = iteration - self._cores_per_instance = cores_per_instance - self._num_of_instance = num_of_instance - self._inter_num_of_threads = inter_num_of_threads - self._intra_num_of_threads = intra_num_of_threads - - @property - def warmup(self): - return self._warmup - - @warmup.setter - def warmup(self, warmup): - if check_value('warmup', warmup, int): - self._warmup = warmup - - @property - def iteration(self): - return self._iteration - - @iteration.setter - def iteration(self, iteration): - if check_value('iteration', iteration, int): - self._iteration = iteration - - @property - def cores_per_instance(self): - return self._cores_per_instance - - @cores_per_instance.setter - def cores_per_instance(self, cores_per_instance): - if check_value('cores_per_instance', cores_per_instance, int): - self._cores_per_instance = cores_per_instance - - @property - def num_of_instance(self): - return self._num_of_instance - - @num_of_instance.setter - def num_of_instance(self, num_of_instance): - if check_value('num_of_instance', num_of_instance, int): - self._num_of_instance = num_of_instance - - @property - def inter_num_of_threads(self): - return self._inter_num_of_threads - - @inter_num_of_threads.setter - def inter_num_of_threads(self, inter_num_of_threads): - if check_value('inter_num_of_threads', inter_num_of_threads, int): - self._inter_num_of_threads = inter_num_of_threads - - @property - def intra_num_of_threads(self): - return self._intra_num_of_threads - - @intra_num_of_threads.setter - def intra_num_of_threads(self, intra_num_of_threads): - if check_value('intra_num_of_threads', intra_num_of_threads, int): - self._intra_num_of_threads = intra_num_of_threads - -class AccuracyLoss: - def __init__(self, loss=0.01): - self._loss = loss - - @property - def relative(self): - return self._loss - - @relative.setter - def relative(self, relative): - if check_value('relative tolerable loss', relative, float): - self._loss = relative - - @property - def absolute(self): - return self._loss - - @absolute.setter - def absolute(self, absolute): - if check_value('absolute tolerable loss', absolute, float): - self._loss = absolute - -tolerable_loss = AccuracyLoss() - -class AccuracyCriterion: - def __init__(self, higher_is_better=True, criterion='relative', tolerable_loss=tolerable_loss): - self._higher_is_better = higher_is_better - self._criterion = criterion - self._tolerable_loss = tolerable_loss - - @property - def higher_is_better(self): - return self._higher_is_better - - @higher_is_better.setter - def higher_is_better(self, higher_is_better): - if check_value('higher_is_better', higher_is_better, bool): - self._higher_is_better = higher_is_better - - @property - def relative(self): - if self._criterion != 'relative': - return None - return self._tolerable_loss.relative - - @relative.setter - def relative(self, relative): - self._criterion = 'relative' - self._tolerable_loss.relative = relative - - @property - def absolute(self): - if self._criterion != 'absolute': - return None - return self._tolerable_loss.absolute - - @absolute.setter - def absolute(self, absolute): - self._criterion = 'absolute' - self._tolerable_loss.absolute = absolute - - def __str__(self): - return self._criterion - -accuracy_criterion = AccuracyCriterion() - -class _BaseQuantizationConfig: - def __init__(self, - inputs=[], - outputs=[], - backend='NA', - device='cpu', - calibration_sampling_size=[100], - op_type_list=None, - op_name_list=None, - strategy='basic', - objective='performance', - timeout=0, - max_trials=100, - performance_only=False, - reduce_range=None, - use_bf16=False, - accuracy_criterion=accuracy_criterion): - self._inputs = inputs - self._outputs = outputs - self._backend = backend - self._device = device - self._op_type_list = op_type_list - self._op_name_list = op_name_list - self._strategy = strategy - self._objective = objective - self._timeout = timeout - self._max_trials = max_trials - self._performance_only = performance_only - self._reduce_range = reduce_range - self._use_bf16 = use_bf16 - self._accuracy_criterion = accuracy_criterion - self._calibration_sampling_size = calibration_sampling_size - - @property - def accuracy_criterion(self): - return self._accuracy_criterion - - @property - def use_bf16(self): - return self._use_bf16 - - @use_bf16.setter - def use_bf16(self, use_bf16): - if check_value('use_bf16', use_bf16, bool): - self._use_bf16 = use_bf16 - - @property - def reduce_range(self): - return self._reduce_range - - @reduce_range.setter - def reduce_range(self, reduce_range): - if check_value('reduce_range', reduce_range, bool): - self._reduce_range = reduce_range - - @property - def performance_only(self): - return self._performance_only - - @performance_only.setter - def performance_only(self, performance_only): - if check_value('performance_only', performance_only, bool): - self._performance_only = performance_only - - @property - def max_trials(self): - return self._max_trials - - @max_trials.setter - def max_trials(self, max_trials): - if check_value('max_trials', max_trials, int): - self._max_trials = max_trials - - @property - def timeout(self): - return self._timeout - - @timeout.setter - def timeout(self, timeout): - if check_value('timeout', timeout, int): - self._timeout = timeout - - @property - def objective(self): - return self._objective - - @objective.setter - def objective(self, objective): - if check_value('objective', objective, str, - ['performance', 'accuracy', 'modelsize', 'footprint']): - self._objective = objective - - @property - def strategy(self): - return self._strategy - - @strategy.setter - def strategy(self, strategy): - if check_value('strategy', strategy, str, - ['basic', 'mse', 'bayesian', 'random', 'exhaustive']): - self._strategy = strategy - - @property - def op_name_list(self): - return self._op_name_list - - @op_name_list.setter - def op_name_list(self, op_name_list): - if not isinstance(op_name_list, dict): - logger.warning("Type of op_name_list should be dict but not {}, " \ - "use its default value.".format(type(op_name_list))) - else: - for k, v in op_name_list.items(): - ops_schema.validate(v) - self._op_name_list = op_name_list - - @property - def op_type_list(self): - return self._op_type_list - - @op_type_list.setter - def op_type_list(self, op_type_list): - if not isinstance(op_type_list, dict): - logger.warning("Type of op_type_list should be dict but not {}, " \ - "use its default value.".format(type(op_type_list))) - else: - for k, v in op_type_list.items(): - ops_schema.validate(v) - self._op_type_list = op_type_list - - @property - def calibration_sampling_size(self): - return self._calibration_sampling_size - - @calibration_sampling_size.setter - def calibration_sampling_size(self, sampling_size): - if check_value('calibration_sampling_size', sampling_size, int): - self._calibration_sampling_size = sampling_size - - @property - def device(self): - return self._device - - @device.setter - def device(self, device): - if check_value('device', device, str, ['cpu', 'gpu']): - self._device = device - - @property - def backend(self): - return self._backend - - @backend.setter - def backend(self, backend): - if check_value('backend', backend, str, [ - 'tensorflow', 'tensorflow_itex', 'pytorch', 'pytorch_ipex', 'pytorch_fx', - 'onnxrt_qlinearops', 'onnxrt_integerops', 'onnxrt_qdq', 'onnxrt_qoperator', 'mxnet' - ]): - self._backend = backend - - @property - def outputs(self): - return self._outputs - - @outputs.setter - def outputs(self, outputs): - if check_value('outputs', outputs, str): - self._outputs = outputs - - @property - def inputs(self): - return self._inputs - - @inputs.setter - def inputs(self, inputs): - if check_value('inputs', inputs, str): - self._inputs = inputs - class QuantizationConfig(_BaseQuantizationConfig): def __init__(self, @@ -424,112 +59,6 @@ def approach(self, approach): self._approach = approach -class PostTrainingConfig(_BaseQuantizationConfig): - def __init__(self, - inputs=[], - outputs=[], - backend='NA', - device='cpu', - approach='post_training_auto_quant', - calibration_sampling_size=[100], - op_type_list=None, - op_name_list=None, - strategy='basic', - objective='performance', - timeout=0, - max_trials=100, - performance_only=False, - reduce_range=None, - use_bf16=False, - accuracy_criterion=accuracy_criterion): - super().__init__(inputs, outputs, backend, device, calibration_sampling_size, op_type_list, - op_name_list, strategy, objective, timeout, max_trials, performance_only, - reduce_range, use_bf16, accuracy_criterion) - self._approach = approach - - @property - def approach(self): - return self._approach - - @approach.setter - def approach(self, approach): - if check_value("approach", approach, str, [ - "post_training_static_quant", "post_training_dynamic_quant", - "post_training_auto_quant" - ]): - self._approach = approach - - -class QuantizationAwareTrainingConfig(_BaseQuantizationConfig): - def __init__(self, - inputs=[], - outputs=[], - backend='NA', - device='cpu', - op_type_list=None, - op_name_list=None, - reduce_range=None, - use_bf16=False): - super().__init__(inputs=inputs, outputs=outputs, backend=backend, device=device, - op_type_list=op_type_list, op_name_list=op_name_list, - reduce_range=reduce_range, use_bf16=use_bf16) - self._approach = 'quant_aware_training' - - @property - def approach(self): - return self._approach - - @approach.setter - def approach(self, approach): - if check_value('approach', approach, str, - ['quant_aware_training']): - self._approach = approach - - -class Options: - def __init__(self, random_seed=1978, workspace=default_workspace, - resume_from=None, tensorboard=False): - self._random_seed = random_seed - self._workspace = workspace - self._resume_from = resume_from - self._tensorboard = tensorboard - - @property - def random_seed(self): - return self._random_seed - - @random_seed.setter - def random_seed(self, random_seed): - if check_value('random_seed', random_seed, int): - self._random_seed = random_seed - - @property - def workspace(self): - return self._workspace - - @workspace.setter - def workspace(self, workspace): - if check_value('workspace', workspace, str): - self._workspace = workspace - - @property - def resume_from(self): - return self._resume_from - - @resume_from.setter - def resume_from(self, resume_from): - if check_value('resume_from', resume_from, str): - self._resume_from = resume_from - - @property - def tensorboard(self): - return self._tensorboard - - @tensorboard.setter - def tensorboard(self, tensorboard): - if check_value('tensorboard', tensorboard, bool): - self._tensorboard = tensorboard - class WeightConf: def __init__(self, datatype=None, scheme=None, granularity=None, algorithm=None): self._datatype = datatype @@ -641,134 +170,6 @@ class PyTorch(MXNet): def __init__(self, precisions=None): super().__init__(precisions) -pruners = [Pruner()] - -class PruningConfig: - def __init__(self, pruners=pruners, initial_sparsity=0.0, target_sparsity=0.97, - max_sparsity_ratio_per_layer=0.98, prune_type="basic_magnitude", - start_epoch=0, end_epoch=4, start_step=0, end_step=0, update_frequency=1.0, - update_frequency_on_step=1, not_to_prune_names=[], prune_domain="global", - names=[], exclude_names=[], prune_layer_type=[], sparsity_decay_type="exp", - pattern="tile_pattern_1x1"): - self._weight_compression = DotDict({ - 'initial_sparsity': initial_sparsity, - 'target_sparsity': target_sparsity, - 'max_sparsity_ratio_per_layer': max_sparsity_ratio_per_layer, - 'prune_type': prune_type, - 'start_epoch': start_epoch, - 'end_epoch': end_epoch, - 'start_step': start_step, - 'end_step': end_step, - 'update_frequency': update_frequency, - 'update_frequency_on_step': update_frequency_on_step, - 'not_to_prune_names': not_to_prune_names, - 'prune_domain': prune_domain, - 'names': names, - 'exclude_names': exclude_names, - 'prune_layer_type': prune_layer_type, - 'sparsity_decay_type': sparsity_decay_type, - 'pattern': pattern, - 'pruners': pruners - }) - - @property - def weight_compression(self): - return self._weight_compression - - @weight_compression.setter - def weight_compression(self, weight_compression): - self._weight_compression = weight_compression - - -class KnowledgeDistillationLossConfig: - def __init__(self, temperature=1.0, loss_types=['CE', 'CE'], loss_weights=[0.5, 0.5]): - self.config = DotDict({ - 'KnowledgeDistillationLoss': { - 'temperature': temperature, - 'loss_types': loss_types, - 'loss_weights': loss_weights - } - }) - - -class IntermediateLayersKnowledgeDistillationLossConfig: - def __init__(self, layer_mappings=[], loss_types=[], loss_weights=[], add_origin_loss=False): - self.config = DotDict({ - 'IntermediateLayersKnowledgeDistillationLoss': { - 'layer_mappings': layer_mappings, - 'loss_types': loss_types, - 'loss_weights': loss_weights, - 'add_origin_loss': add_origin_loss - } - }) - - -class SelfKnowledgeDistillationLossConfig: - def __init__(self, - layer_mappings=[], - temperature=1.0, - loss_types=[], - loss_weights=[], - add_origin_loss=False): - self.config = DotDict({ - 'SelfKnowledgeDistillationLoss': { - 'layer_mappings': layer_mappings, - 'temperature': temperature, - 'loss_types': loss_types, - 'loss_weights': loss_weights, - 'add_origin_loss': add_origin_loss, - } - }) - - -criterion = KnowledgeDistillationLossConfig() - -class DistillationConfig: - """Config of distillation. - - Args: - - teacher_model (Callable): Teacher model for distillation. Defaults to None. - features (optional): Teacher features for distillation, features and teacher_model are alternative. - Defaults to None. - criterion (Callable, optional): Distillation loss configure. - optimizer (dictionary, optional): Optimizer configure. - """ - - def __init__(self, - teacher_model, - criterion=criterion, - optimizer={'SGD': { - 'learning_rate': 0.0001 - }}): - self._criterion = criterion.config - self._optimizer = optimizer - self._teacher_model = teacher_model - - @property - def criterion(self): - return self._criterion - - @criterion.setter - def criterion(self, criterion): - self._criterion = criterion - - @property - def optimizer(self): - return self._optimizer - - @optimizer.setter - def optimizer(self, optimizer): - self._optimizer = optimizer - - @property - def teacher_model(self): - return self._teacher_model - - @teacher_model.setter - def teacher_model(self, teacher_model): - self._teacher_model = teacher_model - class DyNASConfig: def __init__(self, supernet=None, metrics=None, population=50, num_evals=100000, @@ -816,41 +217,8 @@ def search(self, search): self._search = search -class MixedPrecisionConfig(PostTrainingConfig): - def __init__(self, - inputs=[], - outputs=[], - backend='NA', - device='cpu', - op_type_list=None, - op_name_list=None, - strategy='basic', - objective='performance', - timeout=0, - max_trials=100, - performance_only=False, - reduce_range=None, - accuracy_criterion=accuracy_criterion, - precisions=["bf16"]): - super().__init__(inputs, outputs, backend, device, op_type_list=op_type_list, - op_name_list=op_name_list, strategy=strategy, objective=objective, - timeout=timeout, max_trials=max_trials, performance_only=performance_only, - reduce_range=reduce_range, accuracy_criterion=accuracy_criterion, - use_bf16=True) - self._precisions = precisions if isinstance(precisions, List) else [precisions] - - @property - def precisions(self): - return self._precisions - - @precisions.setter - def precisions(self, precisions): - self._precisions = precisions - - quantization = QuantizationConfig() benchmark = BenchmarkConfig() -options = Options() pruning = PruningConfig() distillation = DistillationConfig(teacher_model=None) nas = NASConfig() diff --git a/neural_compressor/config.py b/neural_compressor/config.py new file mode 100644 index 00000000000..535eb307a28 --- /dev/null +++ b/neural_compressor/config.py @@ -0,0 +1,854 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import logging +from typing import List +from schema import Schema, And, Optional +from .conf.dotdict import DotDict +from .conf.config import Pruner + +logger = logging.getLogger("neural_compressor") +default_workspace = './nc_workspace/{}/'.format( + datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) + +QUANTMAPPING = { + "auto": "post_training_auto_quant", + "dynamic": "post_training_dynamic_quant", + "static": "post_training_static_quant", + "qat": "quant_aware_training", +} + + +ops_schema = Schema({ + Optional('weight', default=None): { + Optional('granularity'): And( + list, + lambda s: all(i in ['per_channel', 'per_tensor'] for i in s)), + Optional('scheme'): And( + list, + lambda s: all(i in ['asym', 'sym', 'asym_float'] for i in s)), + Optional('dtype'): And( + list, + lambda s: all(i in ['int8', 'uint8', 'fp32', 'bf16', 'fp16'] for i in s)), + Optional('algorithm'): And( + list, + lambda s: all(i in ['minmax'] for i in s))}, + Optional('activation', default=None): { + Optional('granularity'): And( + list, + lambda s: all(i in ['per_channel', 'per_tensor'] for i in s)), + Optional('scheme'): And( + list, + lambda s: all(i in ['asym', 'sym'] for i in s)), + Optional('dtype'): And( + list, + lambda s: all(i in ['int8', 'uint8', 'fp32', 'bf16', 'fp16', 'None'] for i in s)), + Optional('algorithm'): And( + list, + lambda s: all(i in ['minmax', 'kl', 'placeholder'] for i in s))}}) + + +def check_value(name, src, supported_type, supported_value=[]): + if isinstance(src, list) and any([not isinstance(i, supported_type) for i in src]): + logger.warning("Type of {} items should be {} but not {}, " \ + "use its default value.".format(name, str(supported_type), [type(i) for i in src])) + return False + elif not isinstance(src, list) and not isinstance(src, supported_type): + logger.warning("Type of {} should be {} but not {}, " \ + "use its default value.".format(name, str(supported_type), type(src))) + return False + + if len(supported_value) > 0: + if isinstance(src, str) and src not in supported_value: + logger.warning("{} is not in supported {}: {}. Skip setting it and" \ + " use default value.".format(src, name, str(supported_value))) + return False + elif isinstance(src, list) and all([isinstance(i, str) for i in src]) and \ + any([i not in supported_value for i in src]): + logger.warning("{} is not in supported {}: {}. Skip setting it and" \ + " use default value.".format(src, name, str(supported_value))) + return False + + return True + + +class Options: + def __init__(self, random_seed=1978, workspace=default_workspace, + resume_from=None, tensorboard=False): + self._random_seed = random_seed + self._workspace = workspace + self._resume_from = resume_from + self._tensorboard = tensorboard + + @property + def random_seed(self): + return self._random_seed + + @random_seed.setter + def random_seed(self, random_seed): + if check_value('random_seed', random_seed, int): + self._random_seed = random_seed + + @property + def workspace(self): + return self._workspace + + @workspace.setter + def workspace(self, workspace): + if check_value('workspace', workspace, str): + self._workspace = workspace + + @property + def resume_from(self): + return self._resume_from + + @resume_from.setter + def resume_from(self, resume_from): + if check_value('resume_from', resume_from, str): + self._resume_from = resume_from + + @property + def tensorboard(self): + return self._tensorboard + + @tensorboard.setter + def tensorboard(self, tensorboard): + if check_value('tensorboard', tensorboard, bool): + self._tensorboard = tensorboard + + +options = Options() + + +class BenchmarkConfig: + def __init__(self, warmup=5, iteration=-1, cores_per_instance=None, num_of_instance=None, + inter_num_of_threads=None, intra_num_of_threads=None): + self._warmup = warmup + self._iteration = iteration + self._cores_per_instance = cores_per_instance + self._num_of_instance = num_of_instance + self._inter_num_of_threads = inter_num_of_threads + self._intra_num_of_threads = intra_num_of_threads + + @property + def warmup(self): + return self._warmup + + @warmup.setter + def warmup(self, warmup): + if check_value('warmup', warmup, int): + self._warmup = warmup + + @property + def iteration(self): + return self._iteration + + @iteration.setter + def iteration(self, iteration): + if check_value('iteration', iteration, int): + self._iteration = iteration + + @property + def cores_per_instance(self): + return self._cores_per_instance + + @cores_per_instance.setter + def cores_per_instance(self, cores_per_instance): + if check_value('cores_per_instance', cores_per_instance, int): + self._cores_per_instance = cores_per_instance + + @property + def num_of_instance(self): + return self._num_of_instance + + @num_of_instance.setter + def num_of_instance(self, num_of_instance): + if check_value('num_of_instance', num_of_instance, int): + self._num_of_instance = num_of_instance + + @property + def inter_num_of_threads(self): + return self._inter_num_of_threads + + @inter_num_of_threads.setter + def inter_num_of_threads(self, inter_num_of_threads): + if check_value('inter_num_of_threads', inter_num_of_threads, int): + self._inter_num_of_threads = inter_num_of_threads + + @property + def intra_num_of_threads(self): + return self._intra_num_of_threads + + @intra_num_of_threads.setter + def intra_num_of_threads(self, intra_num_of_threads): + if check_value('intra_num_of_threads', intra_num_of_threads, int): + self._intra_num_of_threads = intra_num_of_threads + + +class AccuracyLoss: + def __init__(self, loss=0.01): + self._loss = loss + + @property + def relative(self): + return self._loss + + @relative.setter + def relative(self, relative): + if check_value('relative tolerable loss', relative, float): + self._loss = relative + + @property + def absolute(self): + return self._loss + + @absolute.setter + def absolute(self, absolute): + if check_value('absolute tolerable loss', absolute, float): + self._loss = absolute + + +tolerable_loss = AccuracyLoss() + + +class AccuracyCriterion: + def __init__(self, higher_is_better=True, criterion='relative', tolerable_loss=tolerable_loss): + self._higher_is_better = higher_is_better + self._criterion = criterion + self._tolerable_loss = tolerable_loss + + @property + def higher_is_better(self): + return self._higher_is_better + + @higher_is_better.setter + def higher_is_better(self, higher_is_better): + if check_value('higher_is_better', higher_is_better, bool): + self._higher_is_better = higher_is_better + + @property + def relative(self): + if self._criterion != 'relative': + return None + return self._tolerable_loss.relative + + @relative.setter + def relative(self, relative): + self._criterion = 'relative' + self._tolerable_loss.relative = relative + + @property + def absolute(self): + if self._criterion != 'absolute': + return None + return self._tolerable_loss.absolute + + @absolute.setter + def absolute(self, absolute): + self._criterion = 'absolute' + self._tolerable_loss.absolute = absolute + + def __str__(self): + return self._criterion + + +accuracy_criterion = AccuracyCriterion() + + +class _BaseQuantizationConfig: + def __init__(self, + inputs=[], + outputs=[], + backend="NA", + device="cpu", + calibration_sampling_size=[100], + op_type_list=None, + op_name_list=None, + strategy="basic", + objective="performance", + timeout=0, + max_trials=100, + performance_only=False, + reduce_range=None, + extra_precisions=[], + accuracy_criterion=accuracy_criterion): + self._inputs = inputs + self._outputs = outputs + self._backend = backend + self._device = device + self._op_type_list = op_type_list + self._op_name_list = op_name_list + self._strategy = strategy + self._objective = objective + self._timeout = timeout + self._max_trials = max_trials + self._performance_only = performance_only + self._reduce_range = reduce_range + self._extra_precisions = extra_precisions \ + if isinstance(extra_precisions, List) else [extra_precisions] + self.use_bf16 = "bf16" in self._extra_precisions + self._accuracy_criterion = accuracy_criterion + self._calibration_sampling_size = calibration_sampling_size + + @property + def accuracy_criterion(self): + return self._accuracy_criterion + + @property + def extra_precisions(self): + return self._extra_precisions + + @extra_precisions.setter + def extra_precisions(self, extra_precisions): + if check_value('extra_precisions', extra_precisions, List): + self._extra_precisions = extra_precisions + self._use_bf16 = "bf16" in extra_precisions + + @property + def reduce_range(self): + return self._reduce_range + + @reduce_range.setter + def reduce_range(self, reduce_range): + if check_value('reduce_range', reduce_range, bool): + self._reduce_range = reduce_range + + @property + def performance_only(self): + return self._performance_only + + @performance_only.setter + def performance_only(self, performance_only): + if check_value('performance_only', performance_only, bool): + self._performance_only = performance_only + + @property + def max_trials(self): + return self._max_trials + + @max_trials.setter + def max_trials(self, max_trials): + if check_value('max_trials', max_trials, int): + self._max_trials = max_trials + + @property + def timeout(self): + return self._timeout + + @timeout.setter + def timeout(self, timeout): + if check_value('timeout', timeout, int): + self._timeout = timeout + + @property + def objective(self): + return self._objective + + @objective.setter + def objective(self, objective): + if check_value('objective', objective, str, + ['performance', 'accuracy', 'modelsize', 'footprint']): + self._objective = objective + + @property + def strategy(self): + return self._strategy + + @strategy.setter + def strategy(self, strategy): + if check_value('strategy', strategy, str, + ['basic', 'mse', 'bayesian', 'random', 'exhaustive']): + self._strategy = strategy + + @property + def op_name_list(self): + return self._op_name_list + + @op_name_list.setter + def op_name_list(self, op_name_list): + if not isinstance(op_name_list, dict): + logger.warning("Type of op_name_list should be dict but not {}, " \ + "use its default value.".format(type(op_name_list))) + else: + for k, v in op_name_list.items(): + ops_schema.validate(v) + self._op_name_list = op_name_list + + @property + def op_type_list(self): + return self._op_type_list + + @op_type_list.setter + def op_type_list(self, op_type_list): + if not isinstance(op_type_list, dict): + logger.warning("Type of op_type_list should be dict but not {}, " \ + "use its default value.".format(type(op_type_list))) + else: + for k, v in op_type_list.items(): + ops_schema.validate(v) + self._op_type_list = op_type_list + + @property + def calibration_sampling_size(self): + return self._calibration_sampling_size + + @calibration_sampling_size.setter + def calibration_sampling_size(self, sampling_size): + if check_value('calibration_sampling_size', sampling_size, int): + self._calibration_sampling_size = sampling_size + + @property + def device(self): + return self._device + + @device.setter + def device(self, device): + if check_value('device', device, str, ['cpu', 'gpu']): + self._device = device + + @property + def backend(self): + return self._backend + + @backend.setter + def backend(self, backend): + if check_value('backend', backend, str, [ + 'tensorflow', 'tensorflow_itex', 'pytorch', 'pytorch_ipex', 'pytorch_fx', + 'onnxrt_qlinearops', 'onnxrt_integerops', 'onnxrt_qdq', 'onnxrt_qoperator', 'mxnet' + ]): + self._backend = backend + + @property + def outputs(self): + return self._outputs + + @outputs.setter + def outputs(self, outputs): + if check_value('outputs', outputs, str): + self._outputs = outputs + + @property + def inputs(self): + return self._inputs + + @inputs.setter + def inputs(self, inputs): + if check_value('inputs', inputs, str): + self._inputs = inputs + + +class TuningCriterion: + def __init__(self, strategy="basic", timeout=0, max_trials=100, objective="performance"): + self._strategy = strategy + self._timeout = timeout + self._max_trials = max_trials + self._objective = objective + + @property + def max_trials(self): + return self._max_trials + + @max_trials.setter + def max_trials(self, max_trials): + if check_value('max_trials', max_trials, int): + self._max_trials = max_trials + + @property + def timeout(self): + return self._timeout + + @timeout.setter + def timeout(self, timeout): + if check_value('timeout', timeout, int): + self._timeout = timeout + + @property + def objective(self): + return self._objective + + @objective.setter + def objective(self, objective): + if check_value('objective', objective, str, + ['performance', 'accuracy', 'modelsize', 'footprint']): + self._objective = objective + + @property + def strategy(self): + return self._strategy + + @strategy.setter + def strategy(self, strategy): + if check_value('strategy', strategy, str, + ['basic', 'mse', 'bayesian', 'random', 'exhaustive']): + self._strategy = strategy + + +tuning_criterion = TuningCriterion() + + +class PostTrainingQuantConfig(_BaseQuantizationConfig): + def __init__(self, + device='cpu', + backend="NA", + inputs=[], + outputs=[], + approach='auto', + calibration_sampling_size=[100], + op_type_list=None, + op_name_list=None, + reduce_range=None, + extra_precisions = [], + tuning_criterion=tuning_criterion, + accuracy_criterion=accuracy_criterion, + ): + super().__init__(inputs=inputs, + outputs=outputs, + device=device, + backend=backend, + calibration_sampling_size=calibration_sampling_size, + op_type_list=op_type_list, + op_name_list=op_name_list, + strategy=tuning_criterion.strategy, + objective=tuning_criterion.objective, + timeout=tuning_criterion.timeout, + max_trials=tuning_criterion.max_trials, + reduce_range=reduce_range, + extra_precisions=extra_precisions, + accuracy_criterion=accuracy_criterion) + self.approach = approach + + @property + def approach(self): + return self._approach + + @approach.setter + def approach(self, approach): + if check_value("approach", approach, str, ["static", "dynamic", "auto"]): + self._approach = QUANTMAPPING[approach] + + +class QuantizationAwareTrainingConfig(_BaseQuantizationConfig): + def __init__(self, + device="cpu", + backend="NA", + inputs=[], + outputs=[], + op_type_list=None, + op_name_list=None, + reduce_range=None, + extra_precisions=[]): + super().__init__(inputs=inputs, outputs=outputs, device=device, backend=backend, + op_type_list=op_type_list, op_name_list=op_name_list, + reduce_range=reduce_range, extra_precisions=extra_precisions) + self._approach = 'quant_aware_training' + + @property + def approach(self): + return self._approach + + +pruners = [Pruner()] + + +class PruningConfig: + def __init__(self, pruners=pruners, initial_sparsity=0.0, target_sparsity=0.97, + max_sparsity_ratio_per_layer=0.98, prune_type="basic_magnitude", + start_epoch=0, end_epoch=4, start_step=0, end_step=0, update_frequency=1.0, + update_frequency_on_step=1, not_to_prune_names=[], prune_domain="global", + names=[], exclude_names=[], prune_layer_type=[], sparsity_decay_type="exp", + pattern="tile_pattern_1x1"): + self._weight_compression = DotDict({ + 'initial_sparsity': initial_sparsity, + 'target_sparsity': target_sparsity, + 'max_sparsity_ratio_per_layer': max_sparsity_ratio_per_layer, + 'prune_type': prune_type, + 'start_epoch': start_epoch, + 'end_epoch': end_epoch, + 'start_step': start_step, + 'end_step': end_step, + 'update_frequency': update_frequency, + 'update_frequency_on_step': update_frequency_on_step, + 'not_to_prune_names': not_to_prune_names, + 'prune_domain': prune_domain, + 'names': names, + 'exclude_names': exclude_names, + 'prune_layer_type': prune_layer_type, + 'sparsity_decay_type': sparsity_decay_type, + 'pattern': pattern, + 'pruners': pruners + }) + + @property + def weight_compression(self): + return self._weight_compression + + @weight_compression.setter + def weight_compression(self, weight_compression): + self._weight_compression = weight_compression + + +class KnowledgeDistillationLossConfig: + def __init__(self, temperature=1.0, loss_types=['CE', 'CE'], loss_weights=[0.5, 0.5]): + self.config = DotDict({ + 'KnowledgeDistillationLoss': { + 'temperature': temperature, + 'loss_types': loss_types, + 'loss_weights': loss_weights + } + }) + + +class IntermediateLayersKnowledgeDistillationLossConfig: + def __init__(self, layer_mappings=[], loss_types=[], loss_weights=[], add_origin_loss=False): + self.config = DotDict({ + 'IntermediateLayersKnowledgeDistillationLoss': { + 'layer_mappings': layer_mappings, + 'loss_types': loss_types, + 'loss_weights': loss_weights, + 'add_origin_loss': add_origin_loss + } + }) + + +class SelfKnowledgeDistillationLossConfig: + def __init__(self, + layer_mappings=[], + temperature=1.0, + loss_types=[], + loss_weights=[], + add_origin_loss=False): + self.config = DotDict({ + 'SelfKnowledgeDistillationLoss': { + 'layer_mappings': layer_mappings, + 'temperature': temperature, + 'loss_types': loss_types, + 'loss_weights': loss_weights, + 'add_origin_loss': add_origin_loss, + } + }) + + +criterion = KnowledgeDistillationLossConfig() + + +class DistillationConfig: + """Config of distillation. + + Args: + + teacher_model (Callable): Teacher model for distillation. Defaults to None. + features (optional): Teacher features for distillation, features and teacher_model are alternative. + Defaults to None. + criterion (Callable, optional): Distillation loss configure. + optimizer (dictionary, optional): Optimizer configure. + """ + + def __init__(self, + teacher_model, + criterion=criterion, + optimizer={'SGD': { + 'learning_rate': 0.0001 + }}): + self._criterion = criterion.config + self._optimizer = optimizer + self._teacher_model = teacher_model + + @property + def criterion(self): + return self._criterion + + @criterion.setter + def criterion(self, criterion): + self._criterion = criterion + + @property + def optimizer(self): + return self._optimizer + + @optimizer.setter + def optimizer(self, optimizer): + self._optimizer = optimizer + + @property + def teacher_model(self): + return self._teacher_model + + @teacher_model.setter + def teacher_model(self, teacher_model): + self._teacher_model = teacher_model + + +class MixedPrecisionConfig(PostTrainingQuantConfig): + def __init__(self, + device="cpu", + backend="NA", + inputs=[], + outputs=[], + tuning_criterion=tuning_criterion, + accuracy_criterion=accuracy_criterion, + extra_precisions=["bf16"]): + super().__init__(inputs=inputs, + outputs=outputs, + device=device, + backend=backend, + tuning_criterion=tuning_criterion, + accuracy_criterion=accuracy_criterion, + extra_precisions=extra_precisions, + ) + + +class ExportConfig: + def __init__( + self, + dtype="int8", + opset_version=14, + quant_mode="'QDQ'", + sample_inputs=None, + input_names=None, + output_names=None, + dynamic_axes=None, + **kwargs, + ): + self._dtype = dtype + self._opset_version = opset_version + self._quant_mode = quant_mode + self._sample_inputs = sample_inputs + self._input_names = input_names + self._output_names = output_names + self._dynamic_axes = dynamic_axes + self._kwargs = kwargs + + @property + def dtype(self): + return self._dtype + + @dtype.setter + def dtype(self, dtype): + self._dtype = dtype + + @property + def opset_version(self): + return self._opset_version + + @opset_version.setter + def opset_version(self, opset_version): + self._opset_version = opset_version + + @property + def quant_mode(self): + return self._quant_mode + + @quant_mode.setter + def quant_mode(self, quant_mode): + self._quant_mode = quant_mode + + @property + def sample_inputs(self): + return self._sample_inputs + + @sample_inputs.setter + def sample_inputs(self, sample_inputs): + self._sample_inputs = sample_inputs + + @property + def input_names(self): + return self._input_names + + @input_names.setter + def input_names(self, input_names): + self._input_names = input_names + + @property + def output_names(self): + return self._output_names + + @output_names.setter + def output_names(self, output_names): + self._output_names = output_names + + @property + def dynamic_axes(self): + return self._output_names + + @dynamic_axes.setter + def dynamic_axes(self, dynamic_axes): + self._dynamic_axes = dynamic_axes + + +class Torch2ONNXConfig(ExportConfig): + def __init__( + self, + dtype="int8", + opset_version=14, + quant_mode="'QDQ'", + sample_inputs=None, + input_names=None, + output_names=None, + dynamic_axes=None, + **kwargs, + ): + super().__init__( + dtype=dtype, + opset_version=opset_version, + quant_mode=quant_mode, + sample_inputs=sample_inputs, + input_names=input_names, + output_names=output_names, + dynamic_axes=dynamic_axes, + kwargs=kwargs, + ) + + +class TF2ONNXConfig(ExportConfig): + def __init__( + self, + dtype="int8", + opset_version=14, + quant_mode="'QDQ'", + sample_inputs=None, + input_names=None, + output_names=None, + dynamic_axes=None, + **kwargs, + ): + super().__init__( + dtype=dtype, + opset_version=opset_version, + quant_mode=quant_mode, + sample_inputs=sample_inputs, + input_names=input_names, + output_names=output_names, + dynamic_axes=dynamic_axes, + kwargs=kwargs, + ) + + +def set_random_seed(seed: int): + options.random_seed + + +def set_workspace(workspace: str): + options.workspace = workspace + + +def set_resume_from(resume_from: str): + options.resume_from = resume_from + + +def set_tensorboard(tensorboard: bool): + options.tensorboard = tensorboard diff --git a/neural_compressor/experimental/benchmark.py b/neural_compressor/experimental/benchmark.py index 28c790ce7ca..ad9efb470aa 100644 --- a/neural_compressor/experimental/benchmark.py +++ b/neural_compressor/experimental/benchmark.py @@ -570,4 +570,4 @@ def postprocess(self, user_postprocess): def __repr__(self): """Get the object representation in string format.""" - return 'Benchmark' + return 'Benchmark' \ No newline at end of file diff --git a/neural_compressor/experimental/component.py b/neural_compressor/experimental/component.py index 7a3a225b54e..25ab4d4ba93 100644 --- a/neural_compressor/experimental/component.py +++ b/neural_compressor/experimental/component.py @@ -105,14 +105,6 @@ def _init_with_conf(self): logger.error("{}.".format(e)) raise RuntimeError("{} is not correctly installed. " \ "Please check your environment".format(lib)) - if self.framework == 'tensorflow' or self.framework == 'inteltensorflow': - try: - import tensorflow as tf - except Exception as e: - logger.error("{}.".format(e)) - raise RuntimeError( - "The TensorFlow framework is not correctly installed. Please check your environment" - ) def prepare(self): """Register Quantization Aware Training hooks.""" @@ -133,7 +125,6 @@ def prepare(self): self.register_hook('on_train_begin', self.adaptor._pre_hook_for_qat) self.register_hook('on_train_end', self.adaptor._post_hook_for_qat) - def prepare_qat(self): """Register Quantization Aware Training hooks.""" if self.adaptor is None: diff --git a/neural_compressor/experimental/distillation.py b/neural_compressor/experimental/distillation.py index 5afff4138f4..e7fd95f8255 100644 --- a/neural_compressor/experimental/distillation.py +++ b/neural_compressor/experimental/distillation.py @@ -92,6 +92,7 @@ def _on_train_begin(self, dataloader=None): self.best_model = copy.deepcopy(self._model) else: self.best_model = self._model + def _on_step_begin(self, batch_id): """Operations called on the beginning of batches.""" if self.criterion is not None and hasattr(self.criterion, 'clear_features'): @@ -144,7 +145,10 @@ def _on_epoch_end(self): if (isinstance(score, list) and all([s > b_s for s, b_s in zip(score, self.best_score)])) or score > self.best_score: self.best_score = score - self.best_model = copy.deepcopy(self._model._model) + if self.framework == "pytorch": + self.best_model = copy.deepcopy(self._model) + else: + self.best_model = self._model def init_train_cfg(self): """Initialize the training configuration.""" @@ -287,11 +291,7 @@ def execute(self): logger.info("Model distillation is done.") if self._eval_func is not None: logger.info("Start to evaluate the distilled model.") - if self.best_model: - if self.framework == "pytorch": - self._model._model = self.best_model - else: - self._model = self.best_model + self._model = self.best_model if self.best_model else self._model score = self._eval_func( self._model if getattr(self._eval_func, 'builtin', None) else self._model.model ) diff --git a/neural_compressor/experimental/quantization.py b/neural_compressor/experimental/quantization.py index 5f0eda5ecf9..3d7b7811ea2 100644 --- a/neural_compressor/experimental/quantization.py +++ b/neural_compressor/experimental/quantization.py @@ -390,12 +390,11 @@ def q_func(self): return None @q_func.setter - @deprecated(version='2.0', reason="please use `train_func` instead") def q_func(self, user_q_func): - """Training function for Quantization-Aware Training. + """Calibrate quantization parameters for Post-training static quantization. It is optional and only takes effect when user choose - "quant_aware_training" approach in yaml. + "post_training_static_quant" approach in yaml. Args: user_q_func: This function takes "model" as input parameter diff --git a/neural_compressor/mix_precision.py b/neural_compressor/mix_precision.py index ab0a774aad3..f89686887b7 100644 --- a/neural_compressor/mix_precision.py +++ b/neural_compressor/mix_precision.py @@ -17,14 +17,14 @@ from .experimental.mixed_precision import MixedPrecision -from neural_compressor.conf.pythonic_config import Config, MixedPrecisionConfig, Options +from neural_compressor.conf.pythonic_config import Config +from neural_compressor.config import MixedPrecisionConfig def fit(model, config=None, eval_func=None, eval_dataloader=None, eval_metric=None, **kwargs): assert isinstance(config, MixedPrecisionConfig), "Please provide MixedPrecisionConfig!" - options = Options() if "options" not in kwargs else kwargs["options"] - conf = Config(quantization=config, options=options) + conf = Config(quantization=config) converter = MixedPrecision(conf) - converter.precisions = config.precisions + converter.precisions = config.extra_precisions converter.model = model if eval_func is not None: converter.eval_func = eval_func diff --git a/neural_compressor/model/base_model.py b/neural_compressor/model/base_model.py index 029723ad821..c42604f96fa 100644 --- a/neural_compressor/model/base_model.py +++ b/neural_compressor/model/base_model.py @@ -42,12 +42,7 @@ def save(self, root, *args, **kwargs): def export( self, save_path: str, - input, - target_model_type: str = 'ONNX', - quant_format: str = 'QDQ', - opset_version: int = 14, - *args, - **kwargs + conf, ): ''' abstract method of model convertion to ONNX''' raise NotImplementedError diff --git a/neural_compressor/model/model.py b/neural_compressor/model/model.py index 13629a19038..59a87d51a29 100644 --- a/neural_compressor/model/model.py +++ b/neural_compressor/model/model.py @@ -949,11 +949,6 @@ def save(self, root=None): f.write(self.graph_def.SerializeToString()) logger.info("Save quantized model to {}.".format(pb_file)) - @abstractmethod - def convert(self, src_type="QDQ", dst_type="TFDO", *args, **kwargs): - ''' abstract method of model saving, Tensorflow model only''' - raise NotImplementedError - class TensorflowSavedModelModel(TensorflowBaseModel): def get_all_weight_names(self): diff --git a/neural_compressor/model/torch_model.py b/neural_compressor/model/torch_model.py index fa09e64e45d..06727a92a0c 100644 --- a/neural_compressor/model/torch_model.py +++ b/neural_compressor/model/torch_model.py @@ -24,7 +24,6 @@ from ..adaptor.torch_utils.util import input2tuple from neural_compressor.utils.utility import LazyImport, compute_sparsity from neural_compressor.utils import logger -from neural_compressor.conf.dotdict import deep_get, deep_set from neural_compressor.conf import config as cfg from neural_compressor.model.base_model import BaseModel @@ -623,17 +622,11 @@ def export_to_int8_onnx( def export( self, save_path: str, - input, - target_model_type: str = 'ONNX', - quant_mode: str = 'QDQ', - opset_version: int = 14, - *args, - **kwargs + conf, ): - if self.q_config is not None: - assert False, "Unsupport convertion from PyTorch to ONNX" - else: - self.export_to_fp32_onnx(save_path, input, opset_version=opset_version) + # TODO + from neural_compressor.config import Torch2ONNXConfig + pass class PyTorchFXModel(PyTorchModel): diff --git a/neural_compressor/quantization.py b/neural_compressor/quantization.py index 025e4c23fa5..272b86fdc0f 100644 --- a/neural_compressor/quantization.py +++ b/neural_compressor/quantization.py @@ -20,7 +20,8 @@ from .data import DATALOADERS, DATASETS from .experimental import Quantization as ExpQuantization from deprecated import deprecated -from neural_compressor.conf.pythonic_config import Config, PostTrainingConfig +from neural_compressor.conf.pythonic_config import Config +from neural_compressor.config import PostTrainingQuantConfig class Quantization(object): """Quantization class automatically searches for optimal quantization recipes for low @@ -155,7 +156,7 @@ def eval_func(model): self.exp_quantizer.q_func = q_func if eval_func is not None: - self.exp_quantizer.eval_func = eval_func + self.exp_quantizer.eval_func = eval_func elif eval_dataloader is not None: self.exp_quantizer.eval_dataloader = eval_dataloader @@ -197,10 +198,14 @@ def postprocess(self, name, postprocess_cls, **kwargs): self.exp_quantizer.postprocess = nc_postprocess -def fit( - model, conf, calib_dataloader=None, calib_func=None, eval_dataloader=None, - eval_func=None, eval_metric=None, options=None, **kwargs -): +def fit(model, + conf, + calib_dataloader=None, + calib_func=None, + eval_dataloader=None, + eval_func=None, + eval_metric=None, + **kwargs): """Quantize the model with a given configure. Args: @@ -256,22 +261,21 @@ def eval_func(model): output = model(input) accuracy = metric(output, label) return accuracy - options (Options, optional): The configure for random_seed, workspace, - resume path and tensorboard flag. """ - if isinstance(conf, PostTrainingConfig): - if options is None: - conf = Config(quantization=conf) - else: - conf = Config(quantization=conf, options=options) + if isinstance(conf, PostTrainingQuantConfig): + if eval_func is None and eval_dataloader is None: + conf.performance_only = True + conf = Config(quantization=conf) quantizer = ExpQuantization(conf) quantizer.model = model if eval_func is not None: quantizer.eval_func = eval_func if calib_dataloader is not None: quantizer.calib_dataloader = calib_dataloader + if calib_func is not None: + quantizer.calib_func = calib_func if eval_dataloader is not None: quantizer.eval_dataloader = eval_dataloader if eval_metric is not None: diff --git a/neural_compressor/training.py b/neural_compressor/training.py index 4cb93e39409..8f0dcecb57e 100644 --- a/neural_compressor/training.py +++ b/neural_compressor/training.py @@ -16,8 +16,8 @@ # limitations under the License. import copy -from .conf.pythonic_config import Config, DistillationConfig, Options, \ - PruningConfig, QuantizationAwareTrainingConfig +from .conf.pythonic_config import Config +from .config import DistillationConfig, PruningConfig, QuantizationAwareTrainingConfig from .experimental.distillation import Distillation from .experimental.pruning import Pruning from .experimental.quantization import Quantization @@ -54,8 +54,7 @@ class CompressionManager: compression_manager.save("path_to_save") """ def __init__(self, component): - self.callbacks = \ - component.components[0] if isinstance(component, Scheduler) else component + self.callbacks = self.CallBacks(component) self.model = component.model try: # TODO: export to ONNX model need original fp32 model now, will remove it @@ -65,6 +64,46 @@ def __init__(self, component): logger.warning("Fail to deep copy the model due to {}.".format(repr(e))) self.fp32_model = None + class CallBacks: + def __init__(self, component): + self.callbacks = \ + component.components[0] if isinstance(component, Scheduler) else component + + def on_train_begin(self, dataloader=None): + """ called before the beginning of epochs""" + self.callbacks.on_train_begin(dataloader) + + def on_train_end(self): + """ called after the end of epochs""" + self.callbacks.on_train_end() + + def on_epoch_begin(self, epoch): + """ called on the beginning of epochs""" + self.callbacks.on_epoch_begin(epoch) + + def on_step_begin(self, batch_id): + """ called on the beginning of batches""" + self.callbacks.on_step_begin(batch_id) + + def on_after_compute_loss(self, input, student_output, student_loss, teacher_output=None): + """ called on the end of loss computation""" + return self.callbacks.on_after_compute_loss( + input, student_output, student_loss, teacher_output=None + ) + + def on_before_optimizer_step(self): + """ called on the end of backward""" + self.callbacks.on_before_optimizer_step() + + + def on_step_end(self): + """ called on the end of batches""" + return self.callbacks.on_step_end() + + def on_epoch_end(self): + """ called on the end of epochs""" + return self.callbacks.on_epoch_end() + def save(self, root=None): """Save compressed model. @@ -101,7 +140,7 @@ def export( assert False, "Unsupport export for {} model".format(type(self.model)) -def prepare_compression(model: Callable, confs: Union[Callable, List], options=None, **kwargs): +def prepare_compression(model: Callable, confs: Union[Callable, List], **kwargs): """_summary_ Args: @@ -135,20 +174,18 @@ def prepare_compression(model: Callable, confs: Union[Callable, List], options=N compression_manager.on_train_end() """ - if options is None: - options = Options() if isinstance(confs, List): from .experimental.scheduler import Scheduler comps = [] for conf in confs: if isinstance(conf, QuantizationAwareTrainingConfig): - conf_ = Config(quantization=conf, options=options) + conf_ = Config(quantization=conf) com = Quantization(conf_) elif isinstance(conf, PruningConfig): - conf_ = Config(pruning=conf, options=options) + conf_ = Config(pruning=conf) com = Pruning(conf_) elif isinstance(conf, DistillationConfig): - conf_ = Config(distillation=conf, options=options) + conf_ = Config(distillation=conf) com = Distillation(conf_) assert conf.teacher_model is not None, \ "Please set teacher_model in DistillationConfig" @@ -165,13 +202,13 @@ def prepare_compression(model: Callable, confs: Union[Callable, List], options=N component = scheduler else: if isinstance(confs, QuantizationAwareTrainingConfig): - conf = Config(quantization=confs, options=options) + conf = Config(quantization=confs) component = Quantization(conf) elif type(confs) == PruningConfig: - conf = Config(pruning=confs, options=options) + conf = Config(pruning=confs) component = Pruning(conf) elif type(confs) == DistillationConfig: - conf = Config(distillation=confs, options=options) + conf = Config(distillation=confs) component = Distillation(conf) assert confs.teacher_model is not None, \ "Please set teacher_model in DistillationConfig" diff --git a/test/adaptor/pytorch_adaptor/test_adaptor_pytorch.py b/test/adaptor/pytorch_adaptor/test_adaptor_pytorch.py deleted file mode 100644 index aeeafd0b660..00000000000 --- a/test/adaptor/pytorch_adaptor/test_adaptor_pytorch.py +++ /dev/null @@ -1,1406 +0,0 @@ -import copy -import neural_compressor.adaptor.pytorch as nc_torch -import numpy as np -import os -import shutil -import torch -import torch.nn as nn -import torch.nn.quantized as nnq -import unittest -import os -from neural_compressor import Options, PostTrainingConfig, QuantizationAwareTrainingConfig -from neural_compressor.conf.config import QuantConf -from neural_compressor.data import DATASETS, DATALOADERS -from neural_compressor.adaptor import FRAMEWORKS -from neural_compressor.model import MODELS -from neural_compressor.experimental import Quantization, common -from neural_compressor.experimental.data.datasets.dataset import DATASETS -from neural_compressor import quantization -from neural_compressor.training import prepare_compression -from neural_compressor.utils.pytorch import load -from neural_compressor.utils.utility import recover -from neural_compressor.utils.utility import LazyImport -from torch.quantization import QuantStub, DeQuantStub -from packaging.version import Version - - -# improve lazy import UT coverage -resnet18 = LazyImport("torchvision.models.resnet18") -q_resnet18 = LazyImport("torchvision.models.quantization.resnet18") - -PT_VERSION = nc_torch.get_torch_version().release -if PT_VERSION >= Version("1.8.0").release: - FX_MODE = True -else: - FX_MODE = False - - -fake_dyn_yaml = """ - model: - name: imagenet - framework: pytorch - - quantization: - approach: post_training_dynamic_quant - op_wise: { - "decoder": { - "activation": {"dtype": ["fp32"]}, - "weight": {"dtype": ["fp32"]} - } - } - evaluation: - accuracy: - metric: - topk: 1 - performance: - warmup: 5 - iteration: 10 - - tuning: - accuracy_criterion: - relative: 0.01 - exit_policy: - timeout: 0 - random_seed: 9527 - workspace: - path: saved - """ - - -fake_ptq_yaml = """ - model: - name: imagenet - framework: pytorch - - quantization: - op_wise: { - - "layer1.0.conv1": { - "activation": {"dtype": ["fp32"]}, - "weight": {"dtype": ["fp32"]} - }, - "layer1.0.conv2": { - "activation": {"dtype": ["fp32"]}, - "weight": {"dtype": ["fp32"]} - }, - "layer2.0.conv1": { - "activation": {"dtype": ["uint8"], "algorithm": ["minmax"], "granularity": ["per_tensor"], "scheme":["sym"]}, - "weight": {"dtype": ["int8"], "algorithm": ["minmax"], "granularity": ["per_channel"], "scheme":["sym"]} - }, - "layer3.0.conv1": { - "activation": {"dtype": ["uint8"], "algorithm": ["kl"], "granularity": ["per_tensor"], "scheme":["sym"]}, - "weight": {"dtype": ["int8"], "algorithm": ["minmax"], "granularity": ["per_channel"], "scheme":["sym"]} - }, - "layer1.0.add_relu": { - "activation": {"dtype": ["fp32"]}, - "weight": {"dtype": ["fp32"]} - }, - } - evaluation: - accuracy: - metric: - topk: 1 - performance: - warmup: 1 - iteration: 10 - - tuning: - accuracy_criterion: - relative: 0.01 - exit_policy: - timeout: 0 - random_seed: 9527 - workspace: - path: saved - """ - -fake_auto_yaml = """ - model: - name: imagenet - framework: pytorch_fx - - quantization: - approach: post_training_auto_quant - evaluation: - accuracy: - metric: - topk: 1 - performance: - warmup: 1 - iteration: 10 - - tuning: - accuracy_criterion: - relative: 0.01 - exit_policy: - timeout: 1000 - max_trials: 3 - random_seed: 9527 - workspace: - path: saved - """ - - -fake_ptq_yaml_for_fx = """ - model: - name: imagenet - framework: pytorch_fx - - quantization: - approach: post_training_auto_quant - op_wise: { - "layer1.0.conv1": { - "activation": {"dtype": ["fp32"]}, - "weight": {"dtype": ["fp32"]} - }, - "layer1.0.conv2": { - "activation": {"dtype": ["fp32"]}, - "weight": {"dtype": ["fp32"]} - }, - "layer2.0.conv1": { - "activation": {"dtype": ["uint8"], "algorithm": ["minmax"], "granularity": ["per_tensor"], "scheme":["sym"]}, - "weight": {"dtype": ["int8"], "algorithm": ["minmax"], "granularity": ["per_channel"], "scheme":["sym"]} - }, - "layer3.0.conv1": { - "activation": {"dtype": ["uint8"], "algorithm": ["kl"], "granularity": ["per_tensor"], "scheme":["sym"]}, - "weight": {"dtype": ["int8"], "algorithm": ["minmax"], "granularity": ["per_channel"], "scheme":["sym"]} - }, - "layer1.0.add_relu": { - "activation": {"dtype": ["fp32"]}, - "weight": {"dtype": ["fp32"]} - }, - "conv.module": { - "weight": {"dtype": ["fp32"]}, - "activation": {"dtype": ["fp32"]} - }, - "default_qconfig": { - "activation": {"dtype": ["fp32"]}, - "weight": {"dtype": ["fp32"]} - } - } - evaluation: - accuracy: - metric: - topk: 1 - performance: - warmup: 5 - iteration: 10 - - tuning: - accuracy_criterion: - relative: 0.01 - exit_policy: - timeout: 0 - random_seed: 9527 - workspace: - path: saved - """ - - -fake_qat_yaml = """ - model: - name: imagenet - framework: pytorch - - quantization: - approach: quant_aware_training - train: - end_epoch: 1 - iteration: 1 - optimizer: - SGD: - learning_rate: 0.0001 - criterion: - CrossEntropyLoss: - reduction: mean - op_wise: { - "layer1.0.conv1": { - "activation": {"dtype": ["fp32"]}, - "weight": {"dtype": ["fp32"]} - }, - "layer1.0.conv2": { - "activation": {"dtype": ["fp32"]}, - "weight": {"dtype": ["fp32"]} - }, - "layer2.0.conv1": { - "activation": {"dtype": ["uint8"], "algorithm": ["minmax"], "granularity": ["per_tensor"], "scheme":["sym"]}, - "weight": {"dtype": ["int8"], "algorithm": ["minmax"], "granularity": ["per_channel"], "scheme":["sym"]} - }, - "layer3.0.conv1": { - "activation": {"dtype": ["uint8"], "algorithm": ["kl"], "granularity": ["per_tensor"], "scheme":["sym"]}, - "weight": {"dtype": ["int8"], "algorithm": ["minmax"], "granularity": ["per_channel"], "scheme":["sym"]} - }, - "layer1.0.add_relu": { - "activation": {"dtype": ["fp32"]}, - "weight": {"dtype": ["fp32"]} - } - } - evaluation: - accuracy: - metric: - topk: 1 - - tuning: - accuracy_criterion: - relative: 0.01 - exit_policy: - timeout: 0 - random_seed: 9527 - workspace: - path: saved - """ - -dyn_op_name_list = {"decoder": {"activation": {"dtype": ["fp32"]}, "weight": {"dtype": ["fp32"]}}} - -ptq_op_name_list = { - "layer1.0.conv1": { - "activation": { - "dtype": ["fp32"] - }, - "weight": { - "dtype": ["fp32"] - } - }, - "layer1.0.conv2": { - "activation": { - "dtype": ["fp32"] - }, - "weight": { - "dtype": ["fp32"] - } - }, - "layer2.0.conv1": { - "activation": { - "dtype": ["uint8"], - "algorithm": ["minmax"], - "granularity": ["per_tensor"], - "scheme": ["sym"] - }, - "weight": { - "dtype": ["int8"], - "algorithm": ["minmax"], - "granularity": ["per_channel"], - "scheme": ["sym"] - } - }, - "layer3.0.conv1": { - "activation": { - "dtype": ["uint8"], - "algorithm": ["kl"], - "granularity": ["per_tensor"], - "scheme": ["sym"] - }, - "weight": { - "dtype": ["int8"], - "algorithm": ["minmax"], - "granularity": ["per_channel"], - "scheme": ["sym"] - } - }, - "layer1.0.add_relu": { - "activation": { - "dtype": ["fp32"] - }, - "weight": { - "dtype": ["fp32"] - } - }, -} - -ptq_fx_op_name_list = { - "layer1.0.conv1": { - "activation": { - "dtype": ["fp32"] - }, - "weight": { - "dtype": ["fp32"] - } - }, - "layer1.0.conv2": { - "activation": { - "dtype": ["fp32"] - }, - "weight": { - "dtype": ["fp32"] - } - }, - "layer2.0.conv1": { - "activation": { - "dtype": ["uint8"], - "algorithm": ["minmax"], - "granularity": ["per_tensor"], - "scheme": ["sym"] - }, - "weight": { - "dtype": ["int8"], - "algorithm": ["minmax"], - "granularity": ["per_channel"], - "scheme": ["sym"] - } - }, - "layer3.0.conv1": { - "activation": { - "dtype": ["uint8"], - "algorithm": ["kl"], - "granularity": ["per_tensor"], - "scheme": ["sym"] - }, - "weight": { - "dtype": ["int8"], - "algorithm": ["minmax"], - "granularity": ["per_channel"], - "scheme": ["sym"] - } - }, - "layer1.0.add_relu": { - "activation": { - "dtype": ["fp32"] - }, - "weight": { - "dtype": ["fp32"] - } - }, - "conv.module": { - "weight": { - "dtype": ["fp32"] - }, - "activation": { - "dtype": ["fp32"] - } - }, - "default_qconfig": { - "activation": { - "dtype": ["fp32"] - }, - "weight": { - "dtype": ["fp32"] - } - } -} - -qat_op_name_list = { - "layer1.0.conv1": { - "activation": { - "dtype": ["fp32"] - }, - "weight": { - "dtype": ["fp32"] - } - }, - "layer1.0.conv2": { - "activation": { - "dtype": ["fp32"] - }, - "weight": { - "dtype": ["fp32"] - } - }, - "layer2.0.conv1": { - "activation": { - "dtype": ["uint8"], - "algorithm": ["minmax"], - "granularity": ["per_tensor"], - "scheme": ["sym"] - }, - "weight": { - "dtype": ["int8"], - "algorithm": ["minmax"], - "granularity": ["per_channel"], - "scheme": ["sym"] - } - }, - "layer3.0.conv1": { - "activation": { - "dtype": ["uint8"], - "algorithm": ["kl"], - "granularity": ["per_tensor"], - "scheme": ["sym"] - }, - "weight": { - "dtype": ["int8"], - "algorithm": ["minmax"], - "granularity": ["per_channel"], - "scheme": ["sym"] - } - }, - "layer1.0.add_relu": { - "activation": { - "dtype": ["fp32"] - }, - "weight": { - "dtype": ["fp32"] - } - } -} - - -def build_pytorch_yaml(): - with open("ptq_yaml.yaml", "w", encoding="utf-8") as f: - f.write(fake_ptq_yaml) - - with open("dynamic_yaml.yaml", "w", encoding="utf-8") as f: - f.write(fake_dyn_yaml) - - with open("qat_yaml.yaml", "w", encoding="utf-8") as f: - f.write(fake_qat_yaml) - - with open("auto_yaml.yaml", "w", encoding="utf-8") as f: - f.write(fake_auto_yaml) - -def build_pytorch_fx_yaml(): - if PT_VERSION >= Version("1.9.0").release: - fake_fx_ptq_yaml = fake_ptq_yaml_for_fx - else: - fake_fx_ptq_yaml = fake_ptq_yaml.replace("pytorch", "pytorch_fx") - with open("fx_ptq_yaml.yaml", "w", encoding="utf-8") as f: - f.write(fake_fx_ptq_yaml) - - fake_fx_dyn_yaml = fake_dyn_yaml.replace("pytorch", "pytorch_fx") - with open("fx_dynamic_yaml.yaml", "w", encoding="utf-8") as f: - f.write(fake_fx_dyn_yaml) - - fake_fx_qat_yaml = fake_qat_yaml.replace("pytorch", "pytorch_fx") - with open("fx_qat_yaml.yaml", "w", encoding="utf-8") as f: - f.write(fake_fx_qat_yaml) - -def build_dump_tensors_yaml(): - fake_yaml = """ - model: - name: imagenet - framework: pytorch - - evaluation: - accuracy: - metric: - topk: 1 - - tuning: - accuracy_criterion: - relative: 0.01 - exit_policy: - timeout: 0 - random_seed: 9527 - workspace: - path: saved - tensorboard: true - """ - with open("dump_yaml.yaml", "w", encoding="utf-8") as f: - f.write(fake_yaml) - - -class M(torch.nn.Module): - def __init__(self): - super().__init__() - self.quant = QuantStub() - self.conv = nn.Conv2d(3, 1, 1) - self.linear = nn.Linear(224 * 224, 5) - self.dequant = DeQuantStub() - - def forward(self, x): - x = self.quant(x) - x = self.conv(x) - x = x.view(1, -1) - x = self.linear(x) - x = self.dequant(x) - return x - - -class FP32Model(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - times = x.size(1) - if times == 1: - return x + x - return x - - -class DynamicModel(torch.nn.Module): - def __init__(self): - super().__init__() - self.conv = nn.Conv2d(1, 1, 1) - def forward(self, x): - if x is not None: - x = self.conv(x) - return x - - -class SubModel(torch.nn.Module): - def __init__(self, bypass=True): - super().__init__() - self.quant = QuantStub() - self.conv = nn.Conv2d(1, 1, 1) - self.conv1 = nn.Conv2d(1, 1, 1) - self.bn = nn.BatchNorm2d(1) - self.relu = nn.ReLU() - self.fp32 = FP32Model() - self.norm = nn.LayerNorm([1, 224, 224]) - self.dequant = DeQuantStub() - self.bypass = bypass - - def forward(self, x): - x = self.conv(x) - x = self.bn(x) - x = self.quant(x) - x = self.relu(x) - x = self.conv1(x) - x = self.dequant(x) - if not self.bypass: - x = self.fp32(x) - x = self.norm(x) - return x - - -class PartialQuantModel(torch.nn.Module): - def __init__(self): - super().__init__() - self.quant = QuantStub() - self.conv = nn.Conv2d(3, 1, 1) - self.bn = nn.BatchNorm2d(1) - self.conv1 = nn.Conv2d(1, 1, 1) - self.bn1 = nn.BatchNorm2d(1) - self.conv2 = nn.Conv2d(1, 1, 1) - self.linear = nn.Linear(224 * 224, 1) - self.dequant = DeQuantStub() - self.sub = SubModel(bypass=False) - - def forward(self, x): - x = self.conv(x) - x = self.bn(x) - x = self.conv1(x) - x = self.bn1(x) - x = self.sub(x) - x = self.quant(x) - x = self.conv2(x) - x = x.view(1, -1) - x = self.linear(x) - x = self.dequant(x) - return x - -class DynamicControlModel(torch.nn.Module): - def __init__(self): - super().__init__() - self.conv = nn.Conv2d(3, 1, 1) - self.bn = nn.BatchNorm2d(1) - self.linear = nn.Linear(224 * 224, 1) - self.sub = SubModel() - self.fp32 = FP32Model() - self.dyn = DynamicModel() - - def forward(self, x): - x = self.conv(x) - x = self.dyn(x) - x = self.bn(x) - x = self.sub(x) - x = self.fp32(x) - x = x.view(1, -1) - x = self.linear(x) - return x - - -class LSTMModel(nn.Module): - """Container module with an encoder, a recurrent module, and a decoder.""" - - def __init__(self, ntoken=10, ninp=512, nhid=256, nlayers=5, dropout=0.5): - super(LSTMModel, self).__init__() - self.drop = nn.Dropout(dropout) - self.encoder = nn.Embedding(ntoken, ninp) - self.rnn = nn.LSTM(ninp, nhid, nlayers, dropout=dropout) - self.decoder = nn.Linear(nhid, ntoken) - self.init_weights() - self.nhid = nhid - self.nlayers = nlayers - - def init_weights(self): - initrange = 0.1 - self.encoder.weight.data.uniform_(-initrange, initrange) - self.decoder.bias.data.zero_() - self.decoder.weight.data.uniform_(-initrange, initrange) - - def forward(self, input): - input = torch.ones((3, 10), dtype=torch.int32) - h0 = torch.randn(2, 10, 256) - c0 = torch.randn(2, 10, 256) - hidden = (h0, c0) - emb = self.encoder(input) - output, hidden = self.rnn(emb, hidden) - output = self.drop(output) - decoded = self.decoder(output) - return decoded, hidden - - -def eval_func(model): - # switch to evaluate mode - model.eval() - with torch.no_grad(): - input = torch.randn(1, 3, 224, 224) - # compute output - output = model(input) - return 0.0 - - -def train_func(compression_manager, model, dataloader=None): - compression_manager.callbacks.on_train_begin(dataloader=dataloader) - optimizer = torch.optim.SGD(model.parameters(), lr=0.0001) - # switch to evaluate mode - model.train() - input = torch.randn(1, 3, 224, 224) - # compute output - output = model(input) - loss = output[0].mean() if isinstance(output, tuple) else output.mean() - optimizer.zero_grad() - loss.backward() - optimizer.step() - compression_manager.callbacks.on_train_end() - return model - - -def q_func(model): - optimizer = torch.optim.SGD(model.parameters(), lr=0.0001) - # switch to evaluate mode - model.train() - input = torch.randn(1, 3, 224, 224) - # compute output - output = model(input) - loss = output.mean() - optimizer.zero_grad() - loss.backward() - optimizer.step() - return model - - -class TestPytorchAdaptor(unittest.TestCase): - # some UT would be affected when IPEX installed. - try: - import intel_extension_for_pytorch as ipex - IPEX = True - except: - IPEX = False - framework_specific_info = {"device": "cpu", - "approach": "post_training_static_quant", - "random_seed": 1234, - "q_dataloader": None, - "workspace_path": "./"} - framework = "pytorch" - adaptor = FRAMEWORKS[framework](framework_specific_info) - model = q_resnet18() - nc_model = MODELS["pytorch"](model) - - @classmethod - def setUpClass(self): - build_pytorch_yaml() - build_dump_tensors_yaml() - - @classmethod - def tearDownClass(self): - os.remove("ptq_yaml.yaml") - os.remove("dynamic_yaml.yaml") - os.remove("qat_yaml.yaml") - os.remove("dump_yaml.yaml") - os.remove("auto_yaml.yaml") - shutil.rmtree("./saved", ignore_errors=True) - shutil.rmtree("runs", ignore_errors=True) - - def test_get_all_weight_name(self): - assert len(list(self.nc_model.get_all_weight_names())) == 62 - - def test_get_weight(self): - for name, param in self.model.named_parameters(): - if name == "layer4.1.conv2.weight": - param.data.fill_(0.0) - if name == "fc.bias": - param.data.fill_(0.1) - assert int(torch.sum(self.nc_model.get_weight("layer4.1.conv2.weight"))) == 0 - assert torch.allclose( - torch.sum( - self.nc_model.get_weight("fc.bias")), - torch.tensor(100.)) - - def test_get_input(self): - model = MODELS["pytorch"](q_resnet18()) - model.model.eval().fuse_model() - model.register_forward_pre_hook() - rand_input = torch.rand(100, 3, 224, 224).float() - model.model(rand_input) - assert torch.equal(model.get_inputs("x"), rand_input) - model.remove_hooks() - - def test_update_weights(self): - self.nc_model.update_weights("fc.bias", torch.zeros([1000])) - assert int(torch.sum(self.nc_model.get_weight("fc.bias"))) == 0 - - def test_get_gradient(self): - with self.assertRaises(AssertionError): - self.nc_model.get_gradient("fc.bias") - - for name, tensor in self.nc_model._model.named_parameters(): - if name == "fc.bias": - tensor.grad = torch.zeros_like(tensor) - break - assert torch.equal(torch.Tensor(self.nc_model.get_gradient("fc.bias")), torch.zeros_like(tensor)) - - rand_input = torch.rand(100, 3, 224, 224).float() - rand_input.grad = torch.ones_like(rand_input) - assert torch.equal(torch.Tensor(self.nc_model.get_gradient(rand_input)), - torch.ones_like(rand_input)) - - def test_report_sparsity(self): - df, total_sparsity = self.nc_model.report_sparsity() - self.assertTrue(total_sparsity > 0) - self.assertTrue(len(df) == 22) - - def test_quantization_saved(self): - for fake_yaml in ["dynamic_yaml.yaml", "qat_yaml.yaml", "ptq_yaml.yaml"]: - model = M() - quantizer = Quantization(fake_yaml) - quantizer.conf.usr_cfg.tuning.exit_policy["performance_only"] = True - dataset = quantizer.dataset("dummy", (100, 3, 224, 224), label=True) - quantizer.model = model - quantizer.calib_dataloader = common.DataLoader(dataset) - quantizer.eval_dataloader = common.DataLoader(dataset) - q_model = quantizer.fit() - eval_func(q_model) - q_model.save("./saved") - # Load configure and weights by neural_compressor.utils - saved_model = load("./saved", model) - eval_func(saved_model) - # recover int8 model from history - history_file = "./saved/history.snapshot" - model_recover = recover(model, history_file, 0) - eval_func(model_recover) - self.assertEqual(type(saved_model.conv), \ - type(model_recover.conv)) - shutil.rmtree("./saved", ignore_errors=True) - from neural_compressor.experimental import Benchmark - evaluator = Benchmark("ptq_yaml.yaml") - # Load configure and weights by neural_compressor.model - evaluator.model = model - evaluator.b_dataloader = common.DataLoader(dataset) - evaluator.fit("accuracy") - - for fake_yaml in ["qat_yaml.yaml", "ptq_yaml.yaml"]: - model = copy.deepcopy(self.model) - if fake_yaml == "ptq_yaml.yaml": - model.eval().fuse_model() - conf = QuantConf(fake_yaml) - quantizer = Quantization(conf) - dataset = quantizer.dataset("dummy", (100, 3, 224, 224)) - quantizer.model = model - if fake_yaml == "qat_yaml.yaml": - quantizer.q_func = q_func - else: - quantizer.calib_dataloader = common.DataLoader(dataset) - quantizer.eval_func = eval_func - q_model = quantizer.fit() - q_model.save("./saved") - # Load configure and weights by neural_compressor.utils - saved_model = load("./saved", model) - eval_func(saved_model) - shutil.rmtree("./saved", ignore_errors=True) - - def test_quantization_new_saved(self): - for fake_yaml in ["dynamic_yaml.yaml", "qat_yaml.yaml", "ptq_yaml.yaml"]: - model = M() - quantizer = Quantization(fake_yaml) - quantizer.conf.usr_cfg.tuning.exit_policy["performance_only"] = True - dataset = quantizer.dataset("dummy", (100, 3, 224, 224), label=True) - quantizer.model = model - quantizer.calib_dataloader = common.DataLoader(dataset) - quantizer.eval_dataloader = common.DataLoader(dataset) - q_model = quantizer.fit() - eval_func(q_model) - torch.save(q_model.quantized_state_dict(), "./saved/model.pt") - # Load configure and weights by neural_compressor.utils - from neural_compressor.experimental.common import Model - common_model = Model(model) - common_model.load_quantized_state_dict(torch.load("./saved/model.pt")) - eval_func(common_model) - self.assertEqual(type(q_model._model.linear), \ - type(common_model._model.linear)) - shutil.rmtree("./saved", ignore_errors=True) - - def test_quantization_new_API(self): - for fake_yaml in ["dynamic", "qat", "static"]: - model = M() - if fake_yaml == "qat": - quant_conf = QuantizationAwareTrainingConfig(op_name_list=qat_op_name_list) - compression_manager = prepare_compression(copy.deepcopy(model), quant_conf) - q_model = train_func(compression_manager, compression_manager.model) - else: - dataset = DATASETS("pytorch")["dummy"]((100, 3, 224, 224)) - dataloader = DATALOADERS["pytorch"](dataset) - if fake_yaml == "dynamic": - quant_conf = PostTrainingConfig(approach="post_training_dynamic_quant", - op_name_list=dyn_op_name_list, - performance_only=True) - elif fake_yaml == "static": - quant_conf = PostTrainingConfig(approach="post_training_static_quant", - op_name_list=ptq_op_name_list, - performance_only=True) - q_model = quantization.fit( - model, - quant_conf, - calib_dataloader=dataloader if fake_yaml == "static" else None, - eval_func=eval_func) - q_model.save("./saved") - # Load configure and weights by neural_compressor.utils - saved_model = load("./saved", model) - shutil.rmtree("./saved", ignore_errors=True) - - @unittest.skipIf(IPEX, "this function is affected by IPEX, Fixing now.") - def test_non_quant_module(self): - for fake_yaml in ["qat_yaml.yaml", "ptq_yaml.yaml"]: - model = PartialQuantModel() - conf = QuantConf(fake_yaml) - quantizer = Quantization(conf) - dataset = quantizer.dataset("dummy", (1, 3, 224, 224)) - non_quant_dict = {"non_quant_module_name": ["conv", "conv1", "sub.conv"], \ - "non_quant_module_class": ["BatchNorm2d", "FP32Model"]} - quantizer.model = common.Model(model, **non_quant_dict) - if fake_yaml == "qat_yaml.yaml": - quantizer.q_func = q_func - else: - quantizer.calib_func = eval_func - quantizer.eval_func = eval_func - q_model = quantizer.fit() - q_model.save("./saved") - saved_model = load("./saved", model, **non_quant_dict) - eval_func(saved_model) - shutil.rmtree("./saved", ignore_errors=True) - - def test_auto_quant(self): - def eval_func(model): - return 1 - - model_origin = LSTMModel( - ntoken = 10, - ninp = 512, - nhid = 256, - nlayers = 2, - ) - # run fx_quant in neural_compressor and save the quantized GraphModule - quant_conf = PostTrainingConfig(approach="post_training_auto_quant") - dataset = DATASETS("pytorch")["dummy"]((100, 3, 224, 224)) - dataloader = common.DataLoader(dataset) - model = common.Model(model_origin) - q_model = quantization.fit(model, - quant_conf, - calib_dataloader=dataloader, - eval_func=eval_func) - self.assertNotEqual(q_model, None) - - def test_workspace_path(self): - model = M() - quant_conf = PostTrainingConfig(approach="post_training_static_quant", - op_name_list=ptq_op_name_list, - performance_only=True) - dataset = DATASETS("pytorch")["dummy"]((100, 3, 224, 224)) - dataloader = common.DataLoader(dataset) - q_model = quantization.fit(model, - quant_conf, - calib_dataloader=dataloader, - eval_func=eval_func) - eval_func(q_model) - os.makedirs("./saved", exist_ok=True) - torch.save(q_model.quantized_state_dict(), "./saved/best_model.pt") - # Load configure and weights by workspace_path - from neural_compressor.experimental.common import Model - common_model = Model(model) - common_model.workspace_path = "./saved" - eval_func(common_model) - self.assertEqual(type(q_model._model.linear), - type(common_model._model.linear)) - shutil.rmtree("./saved", ignore_errors=True) - - def test_get_graph_info(self): - from neural_compressor.model.torch_model import PyTorchModel - model = PyTorchModel(self.model) - op_map = model.graph_info - self.assertTrue(op_map["conv1"] == "Conv2d") - - def test_tensorboard(self): - model = copy.deepcopy(self.nc_model) - model.model.eval().fuse_model() - quant_conf = PostTrainingConfig(approach="post_training_static_quant", - backend="pytorch", - performance_only=True) - options = Options(tensorboard=True) - dataset = DATASETS("pytorch")["dummy"]((100, 3, 224, 224)) - dataloader = common.DataLoader(dataset) - quantization.fit( - model.model, quant_conf, calib_dataloader=dataloader, - eval_func=eval_func, options=options - ) - self.assertTrue(True if os.path.exists("runs/eval/baseline_acc0.0") else False) - quantization.fit(model.model, - quant_conf, - calib_dataloader=dataloader, - eval_dataloader=dataloader, - eval_func=None) - self.assertTrue(True if os.path.exists("runs/eval/baseline_acc0.0") else False) - - def test_tensor_dump_and_set(self): - model = copy.deepcopy(self.nc_model) - model.model.eval().fuse_model() - quantizer = Quantization("ptq_yaml.yaml") - dataset = quantizer.dataset("dummy", (100, 3, 224, 224), label=True) - dataloader = common.DataLoader(dataset) - dataloader = common._generate_common_dataloader(dataloader, "pytorch") - quantizer.eval_dataloader = dataloader - quantizer.calib_dataloader = dataloader - quantizer.model = model.model - q_model = quantizer.fit() - quantizer.strategy.adaptor.inspect_tensor( - model, dataloader, op_list=["conv1.0", "layer1.0.conv1.0"], - iteration_list=[1, 2], inspect_type="all", save_to_disk=True) - load_array = lambda *a, **k: np.load(*a, allow_pickle=True, **k) - a = load_array("saved/dump_tensor/activation_iter1.npz") - w = load_array("saved/dump_tensor/weight.npz") - if PT_VERSION >= Version("1.8.0").release: - self.assertTrue(w["conv1.0"].item()["conv1.0.weight"].shape[0] == - a["conv1.0"].item()["conv1.0.output0"].shape[1]) - else: - self.assertTrue(w["conv1.0"].item()["conv1.0.weight"].shape[0] == - a["conv1.0"].item()["conv1.1.output0"].shape[1]) - data = np.random.random(w["conv1.0"].item()["conv1.0.weight"].shape).astype(np.float32) - quantizer.strategy.adaptor.set_tensor(q_model, {"conv1.0.weight": data}) - changed_tensor = q_model.get_weight("conv1.weight") - scales = changed_tensor.q_per_channel_scales() - changed_tensor_fp32 = torch.dequantize(changed_tensor) - self.assertTrue(np.allclose(data, changed_tensor_fp32.numpy(), atol=2 / np.min(scales.numpy()))) - quantizer.strategy.adaptor.inspect_tensor( - q_model, dataloader, op_list=["conv1.0", "layer1.0.conv1.0"], - iteration_list=[1, 2], inspect_type="all", save_to_disk=False) - - def test_get_graph_info(self): - from neural_compressor.adaptor.pytorch import get_ops_recursively - model = copy.deepcopy(self.model) - op_map = {} - get_ops_recursively(model, "", op_map) - self.assertTrue(op_map["conv1"] == "Conv2d") - - def test_forward_wrapper(self): - vision_model = resnet18() - class dummymodel(torch.nn.Module): - def __init__(self, model): - super(dummymodel, self).__init__() - self._model = model - def forward(self,input=None): - return self._model(input) - - data = [[{"input": torch.rand(3,224,224)}, torch.ones(1,1)], ] - # dataloader.batch_size=100 - dataloader = common.DataLoader(data, batch_size=1) - quant_conf = QuantConf("dynamic_yaml.yaml") - model = dummymodel(vision_model) - q_model = quantization.fit(model, - quant_conf, - calib_dataloader=dataloader, - eval_func=eval_func) - - def test_floatfunctions_fallback(self): - class ModelWithFunctionals(torch.nn.Module): - def __init__(self): - super(ModelWithFunctionals, self).__init__() - self.mycat = nnq.FloatFunctional() - self.myadd = nnq.FloatFunctional() - self.myadd_relu = nnq.FloatFunctional() - # Tracing doesnt work yet for c10 ops with scalar inputs - # https://github.com/pytorch/pytorch/issues/27097 - self.my_scalar_add = nnq.FloatFunctional() - self.mymul = nnq.FloatFunctional() - self.my_scalar_mul = nnq.FloatFunctional() - self.quant = QuantStub() - self.dequant = DeQuantStub() - - def forward(self, x): - x = self.quant(x) - y = self.mycat.cat([x, x, x]) - z = self.myadd.add(y, y) - w = self.myadd_relu.add_relu(z, z) - # Tracing doesnt work yet for c10 ops with scalar inputs - # https://github.com/pytorch/pytorch/issues/27097 - w = self.my_scalar_add.add_scalar(w, -0.5) - w = self.mymul.mul(w, w) - w = self.my_scalar_mul.mul_scalar(w, 0.5) - w = self.dequant(w) - return w - - model = ModelWithFunctionals() - model = MODELS["pytorch"](model) - x = torch.rand(10, 1, dtype=torch.float) - y = model.model(x) - fallback_ops = [] - q_capability = self.adaptor.query_fw_capability(model) - for k, v in q_capability["opwise"].items(): - if k[0] != "quant" and k[0] != "dequant": - fallback_ops.append(k[0]) - model.model.qconfig = torch.quantization.default_qconfig - model.model.quant.qconfig = torch.quantization.default_qconfig - if PT_VERSION >= Version("1.8.0").release: - model.model.dequant.qconfig = torch.quantization.default_qconfig - nc_torch._fallback_quantizable_ops_recursively( - model.model, "", fallback_ops, op_qcfgs={}) - torch.quantization.add_observer_(model.model) - model.model(x) - torch.quantization.convert(model.model, self.adaptor.q_mapping, inplace=True) - qy = model.model(x) - tol = {"atol": 1e-01, "rtol": 1e-03} - self.assertTrue(np.allclose(y, qy, **tol)) - - -@unittest.skipIf(not FX_MODE, "Unsupport Fx Mode with PyTorch Version Below 1.8") -class TestPytorchFXAdaptor(unittest.TestCase): - framework_specific_info = {"device": "cpu", - "approach": "post_training_static_quant", - "random_seed": 1234, - "q_dataloader": None, - "workspace_path": "./"} - framework = "pytorch_fx" - adaptor = FRAMEWORKS[framework](framework_specific_info) - @classmethod - def setUpClass(self): - build_pytorch_fx_yaml() - - @classmethod - def tearDownClass(self): - os.remove("fx_ptq_yaml.yaml") - os.remove("fx_dynamic_yaml.yaml") - shutil.rmtree("./saved", ignore_errors=True) - shutil.rmtree("runs", ignore_errors=True) - - def test_fx_quant(self): - for fake_yaml in ["qat", "static"]: - model_origin = resnet18() - model = common.Model(model_origin, - **{"prepare_custom_config_dict": \ - {"non_traceable_module_name": ["a"]}, - "convert_custom_config_dict": \ - {"preserved_attributes": []} - } - ) - dataset = DATASETS("pytorch")["dummy"]((10, 3, 224, 224), label=True) - dataloader = DATALOADERS["pytorch"](dataset) - if fake_yaml == "qat": - conf = QuantizationAwareTrainingConfig( - op_name_list=qat_op_name_list, backend="pytorch_fx" - ) - compression_manager = prepare_compression(copy.deepcopy(model), conf) - q_model = train_func(compression_manager, compression_manager.model, dataloader) - else: - conf = PostTrainingConfig( - op_name_list=ptq_fx_op_name_list, backend="pytorch_fx", performance_only=True - ) - options = Options(workspace="./saved") - q_model = quantization.fit(model, - conf, - calib_dataloader=dataloader, - eval_func=eval_func, - calib_func=eval_func, - options=options) - q_model.save("./saved") - # Load configure and weights with neural_compressor.utils - model_fx = load("./saved", model_origin, - **{"prepare_custom_config_dict": \ - {"non_traceable_module_name": ["a"]}, - "convert_custom_config_dict": \ - {"preserved_attributes": []}, \ - "dataloader": torch.utils.data.DataLoader(dataset) - }) - self.assertTrue(isinstance(model_fx, torch.fx.graph_module.GraphModule)) - - if fake_yaml != "qat": - # recover int8 model with only tune_cfg - history_file = "./saved/history.snapshot" - model_fx_recover = recover(model_origin, history_file, 0, - **{"prepare_custom_config_dict": - {"non_traceable_module_name": ["a"]}, - "convert_custom_config_dict": - {"preserved_attributes": []}, - "dataloader": dataloader - }) - self.assertEqual(model_fx.code, model_fx_recover.code) - shutil.rmtree("./saved", ignore_errors=True) - for fake_yaml in ["fx_qat_yaml.yaml", "fx_ptq_yaml.yaml"]: - model_origin = M() - # run fx_quant in neural_compressor and save the quantized GraphModule - dataset = DATASETS("pytorch")["dummy"]((100, 3, 224, 224), label=True) - dataloader = DATALOADERS["pytorch"](dataset) - model = common.Model(model_origin, - **{"prepare_custom_config_dict": \ - {"non_traceable_module_name": ["a"]}, - "convert_custom_config_dict": \ - {"preserved_attributes": []} - }) - if fake_yaml == "fx_qat_yaml.yaml": - conf = QuantizationAwareTrainingConfig( - op_name_list=qat_op_name_list, backend="pytorch_fx" - ) - compression_manager = prepare_compression(copy.deepcopy(model), conf) - q_model = train_func(compression_manager, compression_manager.model, dataloader) - compression_manager.save("./saved") - else: - conf = PostTrainingConfig( - op_name_list=ptq_fx_op_name_list, backend="pytorch_fx", performance_only=True - ) - q_model = quantization.fit(model, - conf, - calib_dataloader=dataloader, - eval_dataloader=dataloader) - q_model.save("./saved") - # Load configure and weights with neural_compressor.utils - model_fx = load("./saved", model_origin, - **{"prepare_custom_config_dict": \ - {"non_traceable_module_name": ["a"]}, - "convert_custom_config_dict": \ - {"preserved_attributes": []}, \ - "dataloader": torch.utils.data.DataLoader(dataset) - }) - self.assertTrue(isinstance(model_fx, torch.fx.graph_module.GraphModule)) - shutil.rmtree("./saved", ignore_errors=True) - - @unittest.skipIf(PT_VERSION < Version("1.9.0").release, - "Please use PyTroch 1.9 or higher version for dynamic quantization with pytorch_fx backend") - def test_fx_dynamic_quant(self): - origin_model = LSTMModel( - ntoken = 10, - ninp = 512, - nhid = 256, - nlayers = 5, - ) - # run fx_quant in neural_compressor and save the quantized GraphModule - origin_model.eval() - quant_conf = QuantConf("fx_dynamic_yaml.yaml") - model = common.Model(origin_model, - **{"prepare_custom_config_dict": \ - {"non_traceable_module_name": ["a"]}, - "convert_custom_config_dict": \ - {"preserved_attributes": []} - }) - q_model = quantization.fit(model, - quant_conf - ) - q_model.save("./saved") - - # Load configure and weights by neural_compressor.utils - model_fx = load("./saved", origin_model, - **{"prepare_custom_config_dict": \ - {"non_traceable_module_name": ["a"]}, - "convert_custom_config_dict": \ - {"preserved_attributes": []} - }) - self.assertTrue(isinstance(model_fx, torch.fx.graph_module.GraphModule)) - - # Test the functionality of older model saving type - state_dict = torch.load("./saved/best_model.pt") - tune_cfg = state_dict.pop("best_configure") - import yaml - with open("./saved/best_configure.yaml", "w") as f: - yaml.dump(tune_cfg, f, default_flow_style=False) - torch.save(state_dict, "./saved/best_model_weights.pt") - os.remove("./saved/best_model.pt") - model_fx = load("./saved", origin_model, - **{"prepare_custom_config_dict": \ - {"non_traceable_module_name": ["a"]}, - "convert_custom_config_dict": \ - {"preserved_attributes": []} - }) - self.assertTrue(isinstance(model_fx, torch.fx.graph_module.GraphModule)) - - # recover int8 model with only tune_cfg - history_file = "./saved/history.snapshot" - model_fx_recover = recover(origin_model, history_file, 0, - **{"prepare_custom_config_dict": - {"non_traceable_module_name": ["a"]}, - "convert_custom_config_dict": - {"preserved_attributes": []} - }) - self.assertEqual(model_fx.code, model_fx_recover.code) - shutil.rmtree("./saved", ignore_errors=True) - - def test_default_dynamic_quant(self): - def eval_func(model): - return 1 - - # Model Definition - for fake_yaml in ["fx_qat_yaml.yaml", "fx_ptq_yaml.yaml"]: - model_origin = LSTMModel( - ntoken = 10, - ninp = 512, - nhid = 256, - nlayers = 2, - ) - dataset = DATASETS("pytorch")["dummy"]((3, 10)) - dataloader = DATALOADERS["pytorch"](dataset) - # run fx_quant in neural_compressor and save the quantized GraphModule - if fake_yaml == "fx_qat_yaml.yaml": - conf = QuantizationAwareTrainingConfig( - op_name_list=qat_op_name_list, backend="pytorch_fx" - ) - compression_manager = prepare_compression(copy.deepcopy(model_origin), conf) - q_model = train_func(compression_manager, compression_manager.model, dataloader=dataloader) - self.assertTrue("quantize" in str(type(q_model.model.encoder))) - self.assertTrue("quantize" in str(type(q_model.model.rnn))) - else: - conf = PostTrainingConfig(backend="pytorch_fx", performance_only=True) - q_model = quantization.fit(model_origin, - conf, - calib_dataloader=dataloader, - eval_func=eval_func) - self.assertTrue("quantize" in str(type(q_model.model.encoder))) - self.assertTrue("quantize" in str(type(q_model.model.rnn))) - - def test_fx_sub_module_quant(self): - for fake_yaml in ["fx_qat_yaml.yaml", "fx_ptq_yaml.yaml", "fx_dynamic_yaml.yaml"]: - model_origin = DynamicControlModel() - model = common.Model(model_origin, - **{"prepare_custom_config_dict": \ - {"non_traceable_module_name": ["a"]}, - "convert_custom_config_dict": \ - {"preserved_attributes": []} - }) - dataset = DATASETS("pytorch")["dummy"]((1, 3, 224, 224)) - dataloader = DATALOADERS["pytorch"](dataset) - # run fx_quant in neural_compressor and save the quantized GraphModule - if fake_yaml == "fx_qat_yaml.yaml": - conf = QuantizationAwareTrainingConfig( - op_name_list=qat_op_name_list, backend="pytorch_fx" - ) - compression_manager = prepare_compression(copy.deepcopy(model), conf) - q_model = train_func(compression_manager, compression_manager.model, dataloader) - else: - options = Options(workspace="./saved") - conf = PostTrainingConfig(backend="pytorch_fx", performance_only=True) - q_model = quantization.fit(model, - conf, - calib_dataloader=dataloader, - eval_func=eval_func, - options=options) - q_model.save("./saved") - # Load configure and weights with neural_compressor.utils - model_fx = load("./saved/best_model.pt", model_origin, - **{"prepare_custom_config_dict": \ - {"non_traceable_module_name": ["a"]}, - "convert_custom_config_dict": \ - {"preserved_attributes": []}, \ - "dataloader": torch.utils.data.DataLoader(dataset) - }) - self.assertTrue(isinstance(model_fx.sub, torch.fx.graph_module.GraphModule)) - - if fake_yaml != "fx_qat_yaml.yaml": - # recover int8 model with only tune_cfg - history_file = "./saved/history.snapshot" - model_fx_recover = recover(model_origin, history_file, 0, - **{"prepare_custom_config_dict": \ - {"non_traceable_module_name": ["a"]}, - "convert_custom_config_dict": \ - {"preserved_attributes": []}, \ - "dataloader": torch.utils.data.DataLoader(dataset) - }) - self.assertEqual(model_fx.sub.code, model_fx_recover.sub.code) - shutil.rmtree("./saved", ignore_errors=True) - - def test_deepcopy_failure(self): - def eval_func(model): - return 1 - - # To build an object t2, which will fail on deepcopy. - class T1(): - def __init__(self, t1) -> None: - self.t1 = t1 - self.j = 1 - - # required for usage with set in T1 - def __hash__(self): - return hash(self.j) - - t1 = set() - t2 = T1([t1]) - t1.add(t2) - - for fake_yaml in ['fx_ptq_yaml.yaml']: - model_origin = M() - model_origin.tmp = t2 - # run fx_quant in neural_compressor and save the quantized GraphModule - quantizer = Quantization(fake_yaml) - dataset = quantizer.dataset('dummy', (1, 3, 224, 224), label=True) - quantizer.eval_func = eval_func - quantizer.calib_dataloader = common.DataLoader(dataset) - quantizer.model = common.Model(model_origin) - q_model = quantizer.fit() - self.assertTrue(isinstance(q_model.model, torch.fx.graph_module.GraphModule)) - - @unittest.skipIf(PT_VERSION < Version("1.11.0").release, - "Please use PyTroch 1.11 or higher version for mixed precision with pytorch_fx or pytorch backend") - def test_bf16_capability(self): - model_origin = DynamicControlModel() - os.environ["FORCE_BF16"] = "1" - q_capability = self.adaptor._get_quantizable_ops(model_origin) - del os.environ["FORCE_BF16"] - - self.assertEqual( - [elem["weight"]["dtype"] for elem in q_capability["optypewise"]["Conv2d"]], - [["int8"], "fp32"]) - self.assertEqual( - [elem["activation"]["dtype"] for elem in q_capability["optypewise"]["Conv2d"]], - [["uint8"], "fp32"]) - self.assertEqual( - [elem["weight"]["dtype"] for elem in q_capability["opwise"][("conv", "Conv2d")]], - [["int8"], "fp32"]) - self.assertEqual( - [elem["activation"]["dtype"] for elem in q_capability["opwise"][("conv", "Conv2d")]], - [["uint8"], "fp32"]) - self.assertEqual( - [elem["weight"]["dtype"] for elem in q_capability["opwise"][("linear", "Linear")]], - [["int8"], "fp32", "bf16"]) - self.assertEqual( - [elem["activation"]["dtype"] for elem in q_capability["opwise"][("linear", "Linear")]], - [["uint8"], "fp32", "bf16"]) - - @unittest.skipIf(PT_VERSION < Version("1.11.0").release, - "Please use PyTroch 1.11 or higher version for mixed precision with pytorch_fx or pytorch backend") - def test_mix_precision(self): - model_origin = DynamicControlModel() - # run fx_quant in neural_compressor and save the quantized GraphModule - dataset = DATASETS("pytorch")["dummy"]((100, 3, 224, 224)) - dataloader = DATALOADERS["pytorch"](dataset) - model = common.Model(model_origin, - **{"prepare_custom_config_dict": \ - {"non_traceable_module_name": ["a"]}, - "convert_custom_config_dict": \ - {"preserved_attributes": []} - }) - options = Options(workspace="./saved") - conf = PostTrainingConfig(op_name_list=ptq_fx_op_name_list, backend="pytorch_fx", performance_only=True) - q_model = quantization.fit(model_origin, - conf, - calib_dataloader=dataloader, - eval_func=eval_func, - calib_func = eval_func, - options=options) - tune_cfg = q_model.q_config - tune_cfg["op"][("conv.module", "Conv2d")].clear() - tune_cfg["op"][("conv.module", "Conv2d")] = \ - {"weight": {"dtype": "bf16"}, "activation": {"dtype": "bf16"}} - tune_cfg["bf16_ops_list"].append(("conv.module", "Conv2d")) - from neural_compressor.adaptor.torch_utils.bf16_convert import Convert - q_model._model = Convert(q_model._model, tune_cfg) - - self.assertEqual(q_model._model.conv.module.module.weight.dtype, torch.bfloat16) - self.assertEqual(q_model._model.conv.module.module.bias.dtype, torch.bfloat16) - - def test_symbolic_trace(self): - from neural_compressor.adaptor.torch_utils.symbolic_trace import symbolic_trace - model_origin = DynamicControlModel() - traced_model = symbolic_trace(model_origin, is_qat=False) - if PT_VERSION >= Version("1.11.0").release: - self.assertTrue(isinstance(traced_model.sub, torch.nn.Module)) - self.assertTrue(isinstance(traced_model.conv, torch.fx.graph_module.GraphModule)) - else: - self.assertTrue(isinstance(traced_model.sub, torch.fx.graph_module.GraphModule)) - traced_model_qat = symbolic_trace(model_origin, is_qat=True) - self.assertTrue(isinstance(traced_model_qat.sub, torch.fx.graph_module.GraphModule)) - -if __name__ == "__main__": - unittest.main() diff --git a/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_1.x.py b/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_1.x.py new file mode 100644 index 00000000000..effd890bdd7 --- /dev/null +++ b/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_1.x.py @@ -0,0 +1,1118 @@ +import copy +import neural_compressor.adaptor.pytorch as nc_torch +import numpy as np +import os +import shutil +import torch +import torch.nn as nn +import torch.nn.quantized as nnq +import unittest +from neural_compressor.adaptor import FRAMEWORKS +from neural_compressor.model import MODELS +from neural_compressor.experimental import Quantization, common +from neural_compressor.conf.config import QuantConf +from neural_compressor.utils.pytorch import load +from neural_compressor.utils.utility import recover +from neural_compressor.utils.utility import LazyImport +from torch.quantization import QuantStub, DeQuantStub +from packaging.version import Version +try: + import intel_extension_for_pytorch as ipex + IPEX = True +except: + IPEX = False + +# improve lazy import UT coverage +resnet18 = LazyImport("torchvision.models.resnet18") +q_resnet18 = LazyImport("torchvision.models.quantization.resnet18") + +PT_VERSION = nc_torch.get_torch_version().release +if PT_VERSION >= Version("1.8.0").release: + FX_MODE = True +else: + FX_MODE = False + + +fake_dyn_yaml = ''' + model: + name: imagenet + framework: pytorch + + quantization: + approach: post_training_dynamic_quant + op_wise: { + 'decoder': { + 'activation': {'dtype': ['fp32']}, + 'weight': {'dtype': ['fp32']} + } + } + evaluation: + accuracy: + metric: + topk: 1 + performance: + warmup: 5 + iteration: 10 + + tuning: + accuracy_criterion: + relative: 0.01 + exit_policy: + timeout: 0 + random_seed: 9527 + workspace: + path: saved + ''' + + +fake_ptq_yaml = ''' + model: + name: imagenet + framework: pytorch + + quantization: + op_wise: { + + 'layer1.0.conv1': { + 'activation': {'dtype': ['fp32']}, + 'weight': {'dtype': ['fp32']} + }, + 'layer1.0.conv2': { + 'activation': {'dtype': ['fp32']}, + 'weight': {'dtype': ['fp32']} + }, + 'layer2.0.conv1': { + 'activation': {'dtype': ['uint8'], 'algorithm': ['minmax'], 'granularity': ['per_tensor'], 'scheme':['sym']}, + 'weight': {'dtype': ['int8'], 'algorithm': ['minmax'], 'granularity': ['per_channel'], 'scheme':['sym']} + }, + 'layer3.0.conv1': { + 'activation': {'dtype': ['uint8'], 'algorithm': ['kl'], 'granularity': ['per_tensor'], 'scheme':['sym']}, + 'weight': {'dtype': ['int8'], 'algorithm': ['minmax'], 'granularity': ['per_channel'], 'scheme':['sym']} + }, + 'layer1.0.add_relu': { + 'activation': {'dtype': ['fp32']}, + 'weight': {'dtype': ['fp32']} + }, + } + evaluation: + accuracy: + metric: + topk: 1 + performance: + warmup: 1 + iteration: 10 + + tuning: + accuracy_criterion: + relative: 0.01 + exit_policy: + timeout: 0 + random_seed: 9527 + workspace: + path: saved + ''' + +fake_auto_yaml = ''' + model: + name: imagenet + framework: pytorch_fx + + quantization: + approach: post_training_auto_quant + evaluation: + accuracy: + metric: + topk: 1 + performance: + warmup: 1 + iteration: 10 + + tuning: + accuracy_criterion: + relative: 0.01 + exit_policy: + timeout: 1000 + max_trials: 3 + random_seed: 9527 + workspace: + path: saved + ''' + + +fake_ptq_yaml_for_fx = ''' + model: + name: imagenet + framework: pytorch_fx + + quantization: + approach: post_training_auto_quant + op_wise: { + 'layer1.0.conv1': { + 'activation': {'dtype': ['fp32']}, + 'weight': {'dtype': ['fp32']} + }, + 'layer1.0.conv2': { + 'activation': {'dtype': ['fp32']}, + 'weight': {'dtype': ['fp32']} + }, + 'layer2.0.conv1': { + 'activation': {'dtype': ['uint8'], 'algorithm': ['minmax'], 'granularity': ['per_tensor'], 'scheme':['sym']}, + 'weight': {'dtype': ['int8'], 'algorithm': ['minmax'], 'granularity': ['per_channel'], 'scheme':['sym']} + }, + 'layer3.0.conv1': { + 'activation': {'dtype': ['uint8'], 'algorithm': ['kl'], 'granularity': ['per_tensor'], 'scheme':['sym']}, + 'weight': {'dtype': ['int8'], 'algorithm': ['minmax'], 'granularity': ['per_channel'], 'scheme':['sym']} + }, + 'layer1.0.add_relu': { + 'activation': {'dtype': ['fp32']}, + 'weight': {'dtype': ['fp32']} + }, + 'conv.module': { + 'weight': {'dtype': ['fp32']}, + 'activation': {'dtype': ['fp32']} + }, + 'default_qconfig': { + 'activation': {'dtype': ['fp32']}, + 'weight': {'dtype': ['fp32']} + } + } + evaluation: + accuracy: + metric: + topk: 1 + performance: + warmup: 5 + iteration: 10 + + tuning: + accuracy_criterion: + relative: 0.01 + exit_policy: + timeout: 0 + random_seed: 9527 + workspace: + path: saved + ''' + + +fake_qat_yaml = ''' + model: + name: imagenet + framework: pytorch + + quantization: + approach: quant_aware_training + train: + end_epoch: 1 + iteration: 1 + optimizer: + SGD: + learning_rate: 0.0001 + criterion: + CrossEntropyLoss: + reduction: mean + op_wise: { + 'layer1.0.conv1': { + 'activation': {'dtype': ['fp32']}, + 'weight': {'dtype': ['fp32']} + }, + 'layer1.0.conv2': { + 'activation': {'dtype': ['fp32']}, + 'weight': {'dtype': ['fp32']} + }, + 'layer2.0.conv1': { + 'activation': {'dtype': ['uint8'], 'algorithm': ['minmax'], 'granularity': ['per_tensor'], 'scheme':['sym']}, + 'weight': {'dtype': ['int8'], 'algorithm': ['minmax'], 'granularity': ['per_channel'], 'scheme':['sym']} + }, + 'layer3.0.conv1': { + 'activation': {'dtype': ['uint8'], 'algorithm': ['kl'], 'granularity': ['per_tensor'], 'scheme':['sym']}, + 'weight': {'dtype': ['int8'], 'algorithm': ['minmax'], 'granularity': ['per_channel'], 'scheme':['sym']} + }, + 'layer1.0.add_relu': { + 'activation': {'dtype': ['fp32']}, + 'weight': {'dtype': ['fp32']} + } + } + evaluation: + accuracy: + metric: + topk: 1 + + tuning: + accuracy_criterion: + relative: 0.01 + exit_policy: + timeout: 0 + random_seed: 9527 + workspace: + path: saved + ''' + + +def build_pytorch_yaml(): + with open('ptq_yaml.yaml', 'w', encoding="utf-8") as f: + f.write(fake_ptq_yaml) + + with open('dynamic_yaml.yaml', 'w', encoding="utf-8") as f: + f.write(fake_dyn_yaml) + + with open('qat_yaml.yaml', 'w', encoding="utf-8") as f: + f.write(fake_qat_yaml) + + with open('auto_yaml.yaml', 'w', encoding="utf-8") as f: + f.write(fake_auto_yaml) + +def build_pytorch_fx_yaml(): + if PT_VERSION >= Version("1.9.0").release: + fake_fx_ptq_yaml = fake_ptq_yaml_for_fx + else: + fake_fx_ptq_yaml = fake_ptq_yaml.replace('pytorch', 'pytorch_fx') + with open('fx_ptq_yaml.yaml', 'w', encoding="utf-8") as f: + f.write(fake_fx_ptq_yaml) + + fake_fx_dyn_yaml = fake_dyn_yaml.replace('pytorch', 'pytorch_fx') + with open('fx_dynamic_yaml.yaml', 'w', encoding="utf-8") as f: + f.write(fake_fx_dyn_yaml) + + fake_fx_qat_yaml = fake_qat_yaml.replace('pytorch', 'pytorch_fx') + with open('fx_qat_yaml.yaml', 'w', encoding="utf-8") as f: + f.write(fake_fx_qat_yaml) + +def build_dump_tensors_yaml(): + fake_yaml = ''' + model: + name: imagenet + framework: pytorch + + evaluation: + accuracy: + metric: + topk: 1 + + tuning: + accuracy_criterion: + relative: 0.01 + exit_policy: + timeout: 0 + random_seed: 9527 + workspace: + path: saved + tensorboard: true + ''' + with open('dump_yaml.yaml', 'w', encoding="utf-8") as f: + f.write(fake_yaml) + + +class M(torch.nn.Module): + def __init__(self): + super().__init__() + self.quant = QuantStub() + self.conv = nn.Conv2d(3, 1, 1) + self.linear = nn.Linear(224 * 224, 5) + self.dequant = DeQuantStub() + + def forward(self, x): + x = self.quant(x) + x = self.conv(x) + x = x.view(1, -1) + x = self.linear(x) + x = self.dequant(x) + return x + + +class FP32Model(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + times = x.size(1) + if times == 1: + return x + x + return x + + +class DynamicModel(torch.nn.Module): + def __init__(self): + super().__init__() + self.conv = nn.Conv2d(1, 1, 1) + def forward(self, x): + if x is not None: + x = self.conv(x) + return x + + +class SubModel(torch.nn.Module): + def __init__(self, bypass=True): + super().__init__() + self.quant = QuantStub() + self.conv = nn.Conv2d(1, 1, 1) + self.conv1 = nn.Conv2d(1, 1, 1) + self.bn = nn.BatchNorm2d(1) + self.relu = nn.ReLU() + self.fp32 = FP32Model() + self.norm = nn.LayerNorm([1, 224, 224]) + self.dequant = DeQuantStub() + self.bypass = bypass + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.quant(x) + x = self.relu(x) + x = self.conv1(x) + x = self.dequant(x) + if not self.bypass: + x = self.fp32(x) + x = self.norm(x) + return x + + +class PartialQuantModel(torch.nn.Module): + def __init__(self): + super().__init__() + self.quant = QuantStub() + self.conv = nn.Conv2d(3, 1, 1) + self.bn = nn.BatchNorm2d(1) + self.conv1 = nn.Conv2d(1, 1, 1) + self.bn1 = nn.BatchNorm2d(1) + self.conv2 = nn.Conv2d(1, 1, 1) + self.linear = nn.Linear(224 * 224, 1) + self.dequant = DeQuantStub() + self.sub = SubModel(bypass=False) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.conv1(x) + x = self.bn1(x) + x = self.sub(x) + x = self.quant(x) + x = self.conv2(x) + x = x.view(1, -1) + x = self.linear(x) + x = self.dequant(x) + return x + +class DynamicControlModel(torch.nn.Module): + def __init__(self): + super().__init__() + self.conv = nn.Conv2d(3, 1, 1) + self.bn = nn.BatchNorm2d(1) + self.linear = nn.Linear(224 * 224, 1) + self.sub = SubModel() + self.fp32 = FP32Model() + self.dyn = DynamicModel() + + def forward(self, x): + x = self.conv(x) + x = self.dyn(x) + x = self.bn(x) + x = self.sub(x) + x = self.fp32(x) + x = x.view(1, -1) + x = self.linear(x) + return x + + +class LSTMModel(nn.Module): + '''Container module with an encoder, a recurrent module, and a decoder.''' + + def __init__(self, ntoken=10, ninp=512, nhid=256, nlayers=5, dropout=0.5): + super(LSTMModel, self).__init__() + self.drop = nn.Dropout(dropout) + self.encoder = nn.Embedding(ntoken, ninp) + self.rnn = nn.LSTM(ninp, nhid, nlayers, dropout=dropout) + self.decoder = nn.Linear(nhid, ntoken) + self.init_weights() + self.nhid = nhid + self.nlayers = nlayers + + def init_weights(self): + initrange = 0.1 + self.encoder.weight.data.uniform_(-initrange, initrange) + self.decoder.bias.data.zero_() + self.decoder.weight.data.uniform_(-initrange, initrange) + + def forward(self, input): + input = torch.ones((3, 10), dtype=torch.int32) + h0 = torch.randn(2, 10, 256) + c0 = torch.randn(2, 10, 256) + hidden = (h0, c0) + emb = self.encoder(input) + output, hidden = self.rnn(emb, hidden) + output = self.drop(output) + decoded = self.decoder(output) + return decoded, hidden + + +def eval_func(model): + # switch to evaluate mode + model.eval() + with torch.no_grad(): + input = torch.randn(1, 3, 224, 224) + # compute output + output = model(input) + return 0.0 + + +def q_func(model): + optimizer = torch.optim.SGD(model.parameters(), lr=0.0001) + # switch to evaluate mode + model.train() + input = torch.randn(1, 3, 224, 224) + # compute output + output = model(input) + loss = output.mean() + optimizer.zero_grad() + loss.backward() + optimizer.step() + return model + + +class TestPytorchAdaptor(unittest.TestCase): + framework_specific_info = {"device": "cpu", + "approach": "post_training_static_quant", + "random_seed": 1234, + "q_dataloader": None, + "workspace_path": "./"} + framework = "pytorch" + adaptor = FRAMEWORKS[framework](framework_specific_info) + model = q_resnet18() + nc_model = MODELS['pytorch'](model) + + @classmethod + def setUpClass(self): + build_pytorch_yaml() + build_dump_tensors_yaml() + + @classmethod + def tearDownClass(self): + os.remove('ptq_yaml.yaml') + os.remove('dynamic_yaml.yaml') + os.remove('qat_yaml.yaml') + os.remove('dump_yaml.yaml') + os.remove('auto_yaml.yaml') + shutil.rmtree('./saved', ignore_errors=True) + shutil.rmtree('runs', ignore_errors=True) + + def test_get_all_weight_name(self): + assert len(list(self.nc_model.get_all_weight_names())) == 62 + + def test_get_weight(self): + for name, param in self.model.named_parameters(): + if name == "layer4.1.conv2.weight": + param.data.fill_(0.0) + if name == "fc.bias": + param.data.fill_(0.1) + assert int(torch.sum(self.nc_model.get_weight("layer4.1.conv2.weight"))) == 0 + assert torch.allclose( + torch.sum( + self.nc_model.get_weight("fc.bias")), + torch.tensor(100.)) + + def test_get_input(self): + model = MODELS['pytorch'](q_resnet18()) + model.model.eval().fuse_model() + model.register_forward_pre_hook() + rand_input = torch.rand(100, 3, 224, 224).float() + model.model(rand_input) + assert torch.equal(model.get_inputs('x'), rand_input) + model.remove_hooks() + + def test_update_weights(self): + self.nc_model.update_weights('fc.bias', torch.zeros([1000])) + assert int(torch.sum(self.nc_model.get_weight("fc.bias"))) == 0 + + def test_get_gradient(self): + with self.assertRaises(AssertionError): + self.nc_model.get_gradient('fc.bias') + + for name, tensor in self.nc_model._model.named_parameters(): + if name == 'fc.bias': + tensor.grad = torch.zeros_like(tensor) + break + assert torch.equal(torch.Tensor(self.nc_model.get_gradient('fc.bias')), torch.zeros_like(tensor)) + + rand_input = torch.rand(100, 3, 224, 224).float() + rand_input.grad = torch.ones_like(rand_input) + assert torch.equal(torch.Tensor(self.nc_model.get_gradient(rand_input)), + torch.ones_like(rand_input)) + + def test_report_sparsity(self): + df, total_sparsity = self.nc_model.report_sparsity() + self.assertTrue(total_sparsity > 0) + self.assertTrue(len(df) == 22) + + def test_quantization_saved(self): + for fake_yaml in ['dynamic_yaml.yaml', 'qat_yaml.yaml', 'ptq_yaml.yaml']: + model = M() + quantizer = Quantization(fake_yaml) + quantizer.conf.usr_cfg.tuning.exit_policy['performance_only'] = True + dataset = quantizer.dataset('dummy', (100, 3, 224, 224), label=True) + quantizer.model = model + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.eval_dataloader = common.DataLoader(dataset) + q_model = quantizer.fit() + eval_func(q_model) + q_model.save('./saved') + # Load configure and weights by neural_compressor.utils + saved_model = load("./saved", model) + eval_func(saved_model) + # recover int8 model from history + history_file = './saved/history.snapshot' + model_recover = recover(model, history_file, 0) + eval_func(model_recover) + self.assertEqual(type(saved_model.conv), \ + type(model_recover.conv)) + shutil.rmtree('./saved', ignore_errors=True) + from neural_compressor.experimental import Benchmark + evaluator = Benchmark('ptq_yaml.yaml') + # Load configure and weights by neural_compressor.model + evaluator.model = model + evaluator.b_dataloader = common.DataLoader(dataset) + evaluator.fit('accuracy') + + for fake_yaml in ['qat_yaml.yaml', 'ptq_yaml.yaml']: + model = copy.deepcopy(self.model) + if fake_yaml == 'ptq_yaml.yaml': + model.eval().fuse_model() + conf = QuantConf(fake_yaml) + quantizer = Quantization(conf) + dataset = quantizer.dataset('dummy', (100, 3, 224, 224)) + quantizer.model = model + if fake_yaml == 'qat_yaml.yaml': + quantizer.q_func = q_func + else: + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.eval_func = eval_func + q_model = quantizer.fit() + q_model.save('./saved') + # Load configure and weights by neural_compressor.utils + saved_model = load("./saved", model) + eval_func(saved_model) + shutil.rmtree('./saved', ignore_errors=True) + + def test_quantization_new_saved(self): + for fake_yaml in ['dynamic_yaml.yaml', 'qat_yaml.yaml', 'ptq_yaml.yaml']: + model = M() + quantizer = Quantization(fake_yaml) + quantizer.conf.usr_cfg.tuning.exit_policy['performance_only'] = True + dataset = quantizer.dataset('dummy', (100, 3, 224, 224), label=True) + quantizer.model = model + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.eval_dataloader = common.DataLoader(dataset) + q_model = quantizer.fit() + eval_func(q_model) + torch.save(q_model.quantized_state_dict(), './saved/model.pt') + # Load configure and weights by neural_compressor.utils + from neural_compressor.experimental.common import Model + common_model = Model(model) + common_model.load_quantized_state_dict(torch.load('./saved/model.pt')) + eval_func(common_model) + self.assertEqual(type(q_model._model.linear), \ + type(common_model._model.linear)) + shutil.rmtree('./saved', ignore_errors=True) + + @unittest.skipIf(IPEX, "this function is affected by IPEX, Fixing now.") + def test_non_quant_module(self): + for fake_yaml in ['qat_yaml.yaml', 'ptq_yaml.yaml']: + model = PartialQuantModel() + conf = QuantConf(fake_yaml) + quantizer = Quantization(conf) + dataset = quantizer.dataset('dummy', (1, 3, 224, 224)) + non_quant_dict = {'non_quant_module_name': ['conv', 'conv1', 'sub.conv'], \ + 'non_quant_module_class': ['BatchNorm2d', 'FP32Model']} + quantizer.model = common.Model(model, **non_quant_dict) + if fake_yaml == 'qat_yaml.yaml': + quantizer.q_func = q_func + else: + quantizer.calib_func = eval_func + quantizer.eval_func = eval_func + q_model = quantizer.fit() + q_model.save('./saved') + saved_model = load("./saved", model, **non_quant_dict) + eval_func(saved_model) + shutil.rmtree('./saved', ignore_errors=True) + + def test_auto_quant(self): + def eval_func(model): + return 1 + + model_origin = LSTMModel( + ntoken = 10, + ninp = 512, + nhid = 256, + nlayers = 2, + ) + # run fx_quant in neural_compressor and save the quantized GraphModule + quantizer = Quantization('auto_yaml.yaml') + dataset = quantizer.dataset('dummy', (3, 10), label=True) + quantizer.eval_func = eval_func + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.model = common.Model(model_origin) + q_model = quantizer.fit() + self.assertNotEqual(q_model, None) + + def test_workspace_path(self): + model = M() + quantizer = Quantization('ptq_yaml.yaml') + quantizer.conf.usr_cfg.tuning.exit_policy['performance_only'] = True + dataset = quantizer.dataset('dummy', (100, 3, 224, 224), label=True) + quantizer.model = model + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.eval_dataloader = common.DataLoader(dataset) + q_model = quantizer.fit() + eval_func(q_model) + torch.save(q_model.quantized_state_dict(), './saved/best_model.pt') + # Load configure and weights by workspace_path + from neural_compressor.experimental.common import Model + common_model = Model(model) + common_model.workspace_path = './saved' + eval_func(common_model) + self.assertEqual(type(q_model._model.linear), \ + type(common_model._model.linear)) + shutil.rmtree('./saved', ignore_errors=True) + + def test_get_graph_info(self): + from neural_compressor.model.torch_model import PyTorchModel + model = PyTorchModel(self.model) + op_map = model.graph_info + self.assertTrue(op_map['conv1'] == 'Conv2d') + + def test_tensorboard(self): + model = copy.deepcopy(self.nc_model) + model.model.eval().fuse_model() + quantizer = Quantization('dump_yaml.yaml') + dataset = quantizer.dataset('dummy', (100, 3, 224, 224), label=True) + quantizer.model = model.model + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.eval_func = eval_func + quantizer.fit() + self.assertTrue(True if os.path.exists('runs/eval/baseline_acc0.0') else False) + quantizer.eval_dataloader = common.DataLoader(dataset) + quantizer.eval_func = None + quantizer.fit() + self.assertTrue(True if os.path.exists('runs/eval/baseline_acc0.0') else False) + + def test_tensor_dump_and_set(self): + model = copy.deepcopy(self.nc_model) + model.model.eval().fuse_model() + quantizer = Quantization('ptq_yaml.yaml') + dataset = quantizer.dataset('dummy', (100, 3, 224, 224), label=True) + dataloader = common.DataLoader(dataset) + dataloader = common._generate_common_dataloader(dataloader, 'pytorch') + quantizer.eval_dataloader = dataloader + quantizer.calib_dataloader = dataloader + quantizer.model = model.model + q_model = quantizer.fit() + quantizer.strategy.adaptor.inspect_tensor( + model, dataloader, op_list=['conv1.0', 'layer1.0.conv1.0'], + iteration_list=[1, 2], inspect_type='all', save_to_disk=True) + load_array = lambda *a, **k: np.load(*a, allow_pickle=True, **k) + a = load_array('saved/dump_tensor/activation_iter1.npz') + w = load_array('saved/dump_tensor/weight.npz') + if PT_VERSION >= Version("1.8.0").release: + self.assertTrue(w['conv1.0'].item()['conv1.0.weight'].shape[0] == + a['conv1.0'].item()['conv1.0.output0'].shape[1]) + else: + self.assertTrue(w['conv1.0'].item()['conv1.0.weight'].shape[0] == + a['conv1.0'].item()['conv1.1.output0'].shape[1]) + data = np.random.random(w['conv1.0'].item()['conv1.0.weight'].shape).astype(np.float32) + quantizer.strategy.adaptor.set_tensor(q_model, {'conv1.0.weight': data}) + changed_tensor = q_model.get_weight('conv1.weight') + scales = changed_tensor.q_per_channel_scales() + changed_tensor_fp32 = torch.dequantize(changed_tensor) + self.assertTrue(np.allclose(data, changed_tensor_fp32.numpy(), atol=2 / np.min(scales.numpy()))) + quantizer.strategy.adaptor.inspect_tensor( + q_model, dataloader, op_list=['conv1.0', 'layer1.0.conv1.0'], + iteration_list=[1, 2], inspect_type='all', save_to_disk=False) + + def test_get_graph_info(self): + from neural_compressor.adaptor.pytorch import get_ops_recursively + model = copy.deepcopy(self.model) + op_map = {} + get_ops_recursively(model, '', op_map) + self.assertTrue(op_map['conv1'] == 'Conv2d') + + def test_forward_wrapper(self): + vision_model = resnet18() + class dummymodel(torch.nn.Module): + def __init__(self, model): + super(dummymodel, self).__init__() + self._model = model + def forward(self,input=None): + return self._model(input) + + data = [[{'input': torch.rand(3,224,224)}, torch.ones(1,1)], ] + # dataloader.batch_size=100 + dataloader = common.DataLoader(data, batch_size=1) + quantizer = Quantization('dynamic_yaml.yaml') + model = dummymodel(vision_model) + quantizer.model = model + quantizer.calib_dataloader = dataloader + quantizer.eval_dataloader = dataloader + quantizer.fit() + + def test_floatfunctions_fallback(self): + class ModelWithFunctionals(torch.nn.Module): + def __init__(self): + super(ModelWithFunctionals, self).__init__() + self.mycat = nnq.FloatFunctional() + self.myadd = nnq.FloatFunctional() + self.myadd_relu = nnq.FloatFunctional() + # Tracing doesnt work yet for c10 ops with scalar inputs + # https://github.com/pytorch/pytorch/issues/27097 + self.my_scalar_add = nnq.FloatFunctional() + self.mymul = nnq.FloatFunctional() + self.my_scalar_mul = nnq.FloatFunctional() + self.quant = QuantStub() + self.dequant = DeQuantStub() + + def forward(self, x): + x = self.quant(x) + y = self.mycat.cat([x, x, x]) + z = self.myadd.add(y, y) + w = self.myadd_relu.add_relu(z, z) + # Tracing doesnt work yet for c10 ops with scalar inputs + # https://github.com/pytorch/pytorch/issues/27097 + w = self.my_scalar_add.add_scalar(w, -0.5) + w = self.mymul.mul(w, w) + w = self.my_scalar_mul.mul_scalar(w, 0.5) + w = self.dequant(w) + return w + + model = ModelWithFunctionals() + model = MODELS['pytorch'](model) + x = torch.rand(10, 1, dtype=torch.float) + y = model.model(x) + fallback_ops = [] + q_capability = self.adaptor.query_fw_capability(model) + for k, v in q_capability["opwise"].items(): + if k[0] != "quant" and k[0] != "dequant": + fallback_ops.append(k[0]) + model.model.qconfig = torch.quantization.default_qconfig + model.model.quant.qconfig = torch.quantization.default_qconfig + if PT_VERSION >= Version("1.8.0").release: + model.model.dequant.qconfig = torch.quantization.default_qconfig + nc_torch._fallback_quantizable_ops_recursively( + model.model, '', fallback_ops, op_qcfgs={}) + torch.quantization.add_observer_(model.model) + model.model(x) + torch.quantization.convert(model.model, self.adaptor.q_mapping, inplace=True) + qy = model.model(x) + tol = {'atol': 1e-01, 'rtol': 1e-03} + self.assertTrue(np.allclose(y, qy, **tol)) + +@unittest.skipIf(not FX_MODE, "Unsupport Fx Mode with PyTorch Version Below 1.8") +class TestPytorchFXAdaptor(unittest.TestCase): + framework_specific_info = {"device": "cpu", + "approach": "post_training_static_quant", + "random_seed": 1234, + "q_dataloader": None, + "workspace_path": "./"} + framework = "pytorch_fx" + adaptor = FRAMEWORKS[framework](framework_specific_info) + @classmethod + def setUpClass(self): + build_pytorch_fx_yaml() + + @classmethod + def tearDownClass(self): + os.remove('fx_ptq_yaml.yaml') + os.remove('fx_dynamic_yaml.yaml') + shutil.rmtree('./saved', ignore_errors=True) + shutil.rmtree('runs', ignore_errors=True) + + def test_fx_quant(self): + for fake_yaml in ['fx_qat_yaml.yaml', 'fx_ptq_yaml.yaml']: + model_origin = resnet18() + # run fx_quant in neural_compressor and save the quantized GraphModule + quantizer = Quantization(fake_yaml) + dataset = quantizer.dataset('dummy', (10, 3, 224, 224), label=True) + quantizer.eval_func = eval_func + if fake_yaml == 'fx_qat_yaml.yaml': + quantizer.q_func = q_func + else: + quantizer.calib_func = eval_func + dataloader = common.DataLoader(dataset) + quantizer.calib_dataloader = dataloader + quantizer.model = common.Model(model_origin, + **{'prepare_custom_config_dict': \ + {'non_traceable_module_name': ['a']}, + 'convert_custom_config_dict': \ + {'preserved_attributes': []} + }) + q_model = quantizer.fit() + q_model.save('./saved') + # Load configure and weights with neural_compressor.utils + model_fx = load('./saved', model_origin, + **{'prepare_custom_config_dict': \ + {'non_traceable_module_name': ['a']}, + 'convert_custom_config_dict': \ + {'preserved_attributes': []}, \ + 'dataloader': quantizer.calib_dataloader + }) + self.assertTrue(isinstance(model_fx, torch.fx.graph_module.GraphModule)) + + # recover int8 model with only tune_cfg + history_file = './saved/history.snapshot' + model_fx_recover = recover(model_origin, history_file, 0, + **{'prepare_custom_config_dict': + {'non_traceable_module_name': ['a']}, + 'convert_custom_config_dict': + {'preserved_attributes': []}, + 'dataloader': quantizer.calib_dataloader + }) + self.assertEqual(model_fx.code, model_fx_recover.code) + shutil.rmtree('./saved', ignore_errors=True) + + for fake_yaml in ['fx_qat_yaml.yaml', 'fx_ptq_yaml.yaml']: + model_origin = M() + # run fx_quant in neural_compressor and save the quantized GraphModule + quantizer = Quantization(fake_yaml) + quantizer.conf.usr_cfg.tuning.exit_policy['performance_only'] = True + dataset = quantizer.dataset('dummy', (10, 3, 224, 224), label=True) + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.eval_dataloader = common.DataLoader(dataset) + quantizer.model = common.Model(model_origin, + **{'prepare_custom_config_dict': \ + {'non_traceable_module_name': ['a']}, + 'convert_custom_config_dict': \ + {'preserved_attributes': []} + }) + q_model = quantizer.fit() + q_model.save('./saved') + # Load configure and weights with neural_compressor.utils + model_fx = load('./saved', model_origin, + **{'prepare_custom_config_dict': \ + {'non_traceable_module_name': ['a']}, + 'convert_custom_config_dict': \ + {'preserved_attributes': []}, \ + 'dataloader': quantizer.calib_dataloader + }) + self.assertTrue(isinstance(model_fx, torch.fx.graph_module.GraphModule)) + shutil.rmtree('./saved', ignore_errors=True) + + @unittest.skipIf(PT_VERSION < Version("1.9.0").release, + "Please use PyTroch 1.9 or higher version for dynamic quantization with pytorch_fx backend") + def test_fx_dynamic_quant(self): + model = LSTMModel( + ntoken = 10, + ninp = 512, + nhid = 256, + nlayers = 5, + ) + # run fx_quant in neural_compressor and save the quantized GraphModule + model.eval() + quantizer = Quantization('fx_dynamic_yaml.yaml') + quantizer.model = common.Model(model, + **{'prepare_custom_config_dict': \ + {'non_traceable_module_name': ['a']}, + 'convert_custom_config_dict': \ + {'preserved_attributes': []} + }) + q_model = quantizer.fit() + q_model.save('./saved') + + # Load configure and weights by neural_compressor.utils + model_fx = load("./saved", model, + **{'prepare_custom_config_dict': \ + {'non_traceable_module_name': ['a']}, + 'convert_custom_config_dict': \ + {'preserved_attributes': []} + }) + self.assertTrue(isinstance(model_fx, torch.fx.graph_module.GraphModule)) + + # Test the functionality of older model saving type + state_dict = torch.load("./saved/best_model.pt") + tune_cfg = state_dict.pop('best_configure') + import yaml + with open("./saved/best_configure.yaml", 'w') as f: + yaml.dump(tune_cfg, f, default_flow_style=False) + torch.save(state_dict, "./saved/best_model_weights.pt") + os.remove('./saved/best_model.pt') + model_fx = load("./saved", model, + **{'prepare_custom_config_dict': \ + {'non_traceable_module_name': ['a']}, + 'convert_custom_config_dict': \ + {'preserved_attributes': []} + }) + self.assertTrue(isinstance(model_fx, torch.fx.graph_module.GraphModule)) + + # recover int8 model with only tune_cfg + history_file = './saved/history.snapshot' + model_fx_recover = recover(model, history_file, 0, + **{'prepare_custom_config_dict': + {'non_traceable_module_name': ['a']}, + 'convert_custom_config_dict': + {'preserved_attributes': []} + }) + self.assertEqual(model_fx.code, model_fx_recover.code) + shutil.rmtree('./saved', ignore_errors=True) + + def test_default_dynamic_quant(self): + def eval_func(model): + return 1 + + def q_func(model): + return model + + # Model Definition + for fake_yaml in ['fx_qat_yaml.yaml', 'fx_ptq_yaml.yaml']: + model_origin = LSTMModel( + ntoken = 10, + ninp = 512, + nhid = 256, + nlayers = 2, + ) + # run fx_quant in neural_compressor and save the quantized GraphModule + quantizer = Quantization(fake_yaml) + dataset = quantizer.dataset('dummy', (3, 10), label=True) + quantizer.eval_func = eval_func + if fake_yaml == 'fx_qat_yaml.yaml': + quantizer.q_func = q_func + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.model = common.Model(model_origin) + q_model = quantizer.fit() + self.assertTrue('quantize' in str(type(q_model.model.encoder))) + self.assertTrue('quantize' in str(type(q_model.model.rnn))) + + def test_fx_sub_module_quant(self): + for fake_yaml in ['fx_qat_yaml.yaml', 'fx_ptq_yaml.yaml', 'fx_dynamic_yaml.yaml']: + model_origin = DynamicControlModel() + # run fx_quant in neural_compressor and save the quantized GraphModule + quantizer = Quantization(fake_yaml) + dataset = quantizer.dataset('dummy', (1, 3, 224, 224), label=True) + quantizer.eval_func = eval_func + if fake_yaml == 'fx_qat_yaml.yaml': + quantizer.q_func = q_func + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.model = common.Model(model_origin, + **{'prepare_custom_config_dict': \ + {'non_traceable_module_name': ['a']}, + 'convert_custom_config_dict': \ + {'preserved_attributes': []} + }) + q_model = quantizer.fit() + q_model.save('./saved') + # Load configure and weights with neural_compressor.utils + model_fx = load('./saved/best_model.pt', model_origin, + **{'prepare_custom_config_dict': \ + {'non_traceable_module_name': ['a']}, + 'convert_custom_config_dict': \ + {'preserved_attributes': []}, \ + 'dataloader': quantizer.calib_dataloader + }) + self.assertTrue(isinstance(model_fx.sub, torch.fx.graph_module.GraphModule)) + + # recover int8 model with only tune_cfg + history_file = './saved/history.snapshot' + model_fx_recover = recover(model_origin, history_file, 0, + **{'prepare_custom_config_dict': \ + {'non_traceable_module_name': ['a']}, + 'convert_custom_config_dict': \ + {'preserved_attributes': []}, \ + 'dataloader': quantizer.calib_dataloader + }) + self.assertEqual(model_fx.sub.code, model_fx_recover.sub.code) + shutil.rmtree('./saved', ignore_errors=True) + + def test_deepcopy_failure(self): + def eval_func(model): + return 1 + + # To build an object t2, which will fail on deepcopy. + class T1(): + def __init__(self, t1) -> None: + self.t1 = t1 + self.j = 1 + + # required for usage with set in T1 + def __hash__(self): + return hash(self.j) + + t1 = set() + t2 = T1([t1]) + t1.add(t2) + + for fake_yaml in ['fx_ptq_yaml.yaml']: + model_origin = M() + model_origin.tmp = t2 + # run fx_quant in neural_compressor and save the quantized GraphModule + quantizer = Quantization(fake_yaml) + dataset = quantizer.dataset('dummy', (1, 3, 224, 224), label=True) + quantizer.eval_func = eval_func + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.model = common.Model(model_origin) + q_model = quantizer.fit() + self.assertTrue(isinstance(q_model.model, torch.fx.graph_module.GraphModule)) + + @unittest.skipIf(PT_VERSION < Version("1.11.0").release, + "Please use PyTroch 1.11 or higher version for mixed precision with pytorch_fx or pytorch backend") + def test_bf16_capability(self): + model_origin = DynamicControlModel() + os.environ['FORCE_BF16'] = '1' + q_capability = self.adaptor._get_quantizable_ops(model_origin) + del os.environ['FORCE_BF16'] + + self.assertEqual( + [elem['weight']['dtype'] for elem in q_capability['optypewise']['Conv2d']], + [['int8'], 'fp32']) + self.assertEqual( + [elem['activation']['dtype'] for elem in q_capability['optypewise']['Conv2d']], + [['uint8'], 'fp32']) + self.assertEqual( + [elem['weight']['dtype'] for elem in q_capability['opwise'][('conv', 'Conv2d')]], + [['int8'], 'fp32']) + self.assertEqual( + [elem['activation']['dtype'] for elem in q_capability['opwise'][('conv', 'Conv2d')]], + [['uint8'], 'fp32']) + self.assertEqual( + [elem['weight']['dtype'] for elem in q_capability['opwise'][('linear', 'Linear')]], + [['int8'], 'fp32', 'bf16']) + self.assertEqual( + [elem['activation']['dtype'] for elem in q_capability['opwise'][('linear', 'Linear')]], + [['uint8'], 'fp32', 'bf16']) + + @unittest.skipIf(PT_VERSION < Version("1.11.0").release, + "Please use PyTroch 1.11 or higher version for mixed precision with pytorch_fx or pytorch backend") + def test_mix_precision(self): + fake_yaml = 'fx_ptq_yaml.yaml' + model_origin = DynamicControlModel() + # run fx_quant in neural_compressor and save the quantized GraphModule + quantizer = Quantization(fake_yaml) + dataset = quantizer.dataset('dummy', (1, 3, 224, 224), label=True) + quantizer.eval_func = eval_func + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.model = common.Model(model_origin, + **{'prepare_custom_config_dict': \ + {'non_traceable_module_name': ['a']}, + 'convert_custom_config_dict': \ + {'preserved_attributes': []} + }) + q_model = quantizer.fit() + tune_cfg = q_model.q_config + tune_cfg['op'][('conv.module', 'Conv2d')].clear() + tune_cfg['op'][('conv.module', 'Conv2d')] = \ + {'weight': {'dtype': 'bf16'}, 'activation': {'dtype': 'bf16'}} + tune_cfg["bf16_ops_list"].append(('conv.module', 'Conv2d')) + from neural_compressor.adaptor.torch_utils.bf16_convert import Convert + q_model._model = Convert(q_model._model, tune_cfg) + + self.assertEqual(q_model._model.conv.module.module.weight.dtype, torch.bfloat16) + self.assertEqual(q_model._model.conv.module.module.bias.dtype, torch.bfloat16) + + def test_symbolic_trace(self): + from neural_compressor.adaptor.torch_utils.symbolic_trace import symbolic_trace + model_origin = DynamicControlModel() + traced_model = symbolic_trace(model_origin, is_qat=False) + if PT_VERSION >= Version("1.11.0").release: + self.assertTrue(isinstance(traced_model.sub, torch.nn.Module)) + self.assertTrue(isinstance(traced_model.conv, torch.fx.graph_module.GraphModule)) + else: + self.assertTrue(isinstance(traced_model.sub, torch.fx.graph_module.GraphModule)) + traced_model_qat = symbolic_trace(model_origin, is_qat=True) + self.assertTrue(isinstance(traced_model_qat.sub, torch.fx.graph_module.GraphModule)) + +if __name__ == "__main__": + unittest.main() diff --git a/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2.x.py b/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2.x.py new file mode 100644 index 00000000000..3bea3e28673 --- /dev/null +++ b/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2.x.py @@ -0,0 +1,682 @@ +import copy +import neural_compressor.adaptor.pytorch as nc_torch +import numpy as np +import os +import shutil +import torch +import torch.nn as nn +import torch.nn.quantized as nnq +import unittest +import os +from neural_compressor import PostTrainingQuantConfig, QuantizationAwareTrainingConfig +from neural_compressor.config import set_tensorboard, set_workspace +from neural_compressor.data import DATASETS, DATALOADERS +from neural_compressor.adaptor import FRAMEWORKS +from neural_compressor.model import MODELS +from neural_compressor.experimental import Quantization, common +from neural_compressor.experimental.data.datasets.dataset import DATASETS +from neural_compressor import quantization +from neural_compressor.training import prepare_compression +from neural_compressor.utils.pytorch import load +from neural_compressor.utils.utility import recover +from neural_compressor.utils.utility import LazyImport +from torch.quantization import QuantStub, DeQuantStub +from packaging.version import Version + + +# improve lazy import UT coverage +resnet18 = LazyImport("torchvision.models.resnet18") +q_resnet18 = LazyImport("torchvision.models.quantization.resnet18") + +PT_VERSION = nc_torch.get_torch_version().release +if PT_VERSION >= Version("1.8.0").release: + FX_MODE = True +else: + FX_MODE = False + + +dyn_op_name_list = {"decoder": {"activation": {"dtype": ["fp32"]}, "weight": {"dtype": ["fp32"]}}} + +ptq_op_name_list = { + "layer1.0.conv1": { + "activation": { + "dtype": ["fp32"] + }, + "weight": { + "dtype": ["fp32"] + } + }, + "layer1.0.conv2": { + "activation": { + "dtype": ["fp32"] + }, + "weight": { + "dtype": ["fp32"] + } + }, + "layer2.0.conv1": { + "activation": { + "dtype": ["uint8"], + "algorithm": ["minmax"], + "granularity": ["per_tensor"], + "scheme": ["sym"] + }, + "weight": { + "dtype": ["int8"], + "algorithm": ["minmax"], + "granularity": ["per_channel"], + "scheme": ["sym"] + } + }, + "layer3.0.conv1": { + "activation": { + "dtype": ["uint8"], + "algorithm": ["kl"], + "granularity": ["per_tensor"], + "scheme": ["sym"] + }, + "weight": { + "dtype": ["int8"], + "algorithm": ["minmax"], + "granularity": ["per_channel"], + "scheme": ["sym"] + } + }, + "layer1.0.add_relu": { + "activation": { + "dtype": ["fp32"] + }, + "weight": { + "dtype": ["fp32"] + } + }, +} + +ptq_fx_op_name_list = { + "layer1.0.conv1": { + "activation": { + "dtype": ["fp32"] + }, + "weight": { + "dtype": ["fp32"] + } + }, + "layer1.0.conv2": { + "activation": { + "dtype": ["fp32"] + }, + "weight": { + "dtype": ["fp32"] + } + }, + "layer2.0.conv1": { + "activation": { + "dtype": ["uint8"], + "algorithm": ["minmax"], + "granularity": ["per_tensor"], + "scheme": ["sym"] + }, + "weight": { + "dtype": ["int8"], + "algorithm": ["minmax"], + "granularity": ["per_channel"], + "scheme": ["sym"] + } + }, + "layer3.0.conv1": { + "activation": { + "dtype": ["uint8"], + "algorithm": ["kl"], + "granularity": ["per_tensor"], + "scheme": ["sym"] + }, + "weight": { + "dtype": ["int8"], + "algorithm": ["minmax"], + "granularity": ["per_channel"], + "scheme": ["sym"] + } + }, + "layer1.0.add_relu": { + "activation": { + "dtype": ["fp32"] + }, + "weight": { + "dtype": ["fp32"] + } + }, + "conv.module": { + "weight": { + "dtype": ["fp32"] + }, + "activation": { + "dtype": ["fp32"] + } + }, + "default_qconfig": { + "activation": { + "dtype": ["fp32"] + }, + "weight": { + "dtype": ["fp32"] + } + } +} + +qat_op_name_list = { + "layer1.0.conv1": { + "activation": { + "dtype": ["fp32"] + }, + "weight": { + "dtype": ["fp32"] + } + }, + "layer1.0.conv2": { + "activation": { + "dtype": ["fp32"] + }, + "weight": { + "dtype": ["fp32"] + } + }, + "layer2.0.conv1": { + "activation": { + "dtype": ["uint8"], + "algorithm": ["minmax"], + "granularity": ["per_tensor"], + "scheme": ["sym"] + }, + "weight": { + "dtype": ["int8"], + "algorithm": ["minmax"], + "granularity": ["per_channel"], + "scheme": ["sym"] + } + }, + "layer3.0.conv1": { + "activation": { + "dtype": ["uint8"], + "algorithm": ["kl"], + "granularity": ["per_tensor"], + "scheme": ["sym"] + }, + "weight": { + "dtype": ["int8"], + "algorithm": ["minmax"], + "granularity": ["per_channel"], + "scheme": ["sym"] + } + }, + "layer1.0.add_relu": { + "activation": { + "dtype": ["fp32"] + }, + "weight": { + "dtype": ["fp32"] + } + } +} + + + + +class M(torch.nn.Module): + def __init__(self): + super().__init__() + self.quant = QuantStub() + self.conv = nn.Conv2d(3, 1, 1) + self.linear = nn.Linear(224 * 224, 5) + self.dequant = DeQuantStub() + + def forward(self, x): + x = self.quant(x) + x = self.conv(x) + x = x.view(1, -1) + x = self.linear(x) + x = self.dequant(x) + return x + + +class FP32Model(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + times = x.size(1) + if times == 1: + return x + x + return x + + +class DynamicModel(torch.nn.Module): + def __init__(self): + super().__init__() + self.conv = nn.Conv2d(1, 1, 1) + def forward(self, x): + if x is not None: + x = self.conv(x) + return x + + +class SubModel(torch.nn.Module): + def __init__(self, bypass=True): + super().__init__() + self.quant = QuantStub() + self.conv = nn.Conv2d(1, 1, 1) + self.conv1 = nn.Conv2d(1, 1, 1) + self.bn = nn.BatchNorm2d(1) + self.relu = nn.ReLU() + self.fp32 = FP32Model() + self.norm = nn.LayerNorm([1, 224, 224]) + self.dequant = DeQuantStub() + self.bypass = bypass + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.quant(x) + x = self.relu(x) + x = self.conv1(x) + x = self.dequant(x) + if not self.bypass: + x = self.fp32(x) + x = self.norm(x) + return x + + +class PartialQuantModel(torch.nn.Module): + def __init__(self): + super().__init__() + self.quant = QuantStub() + self.conv = nn.Conv2d(3, 1, 1) + self.bn = nn.BatchNorm2d(1) + self.conv1 = nn.Conv2d(1, 1, 1) + self.bn1 = nn.BatchNorm2d(1) + self.conv2 = nn.Conv2d(1, 1, 1) + self.linear = nn.Linear(224 * 224, 1) + self.dequant = DeQuantStub() + self.sub = SubModel(bypass=False) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.conv1(x) + x = self.bn1(x) + x = self.sub(x) + x = self.quant(x) + x = self.conv2(x) + x = x.view(1, -1) + x = self.linear(x) + x = self.dequant(x) + return x + +class DynamicControlModel(torch.nn.Module): + def __init__(self): + super().__init__() + self.conv = nn.Conv2d(3, 1, 1) + self.bn = nn.BatchNorm2d(1) + self.linear = nn.Linear(224 * 224, 1) + self.sub = SubModel() + self.fp32 = FP32Model() + self.dyn = DynamicModel() + + def forward(self, x): + x = self.conv(x) + x = self.dyn(x) + x = self.bn(x) + x = self.sub(x) + x = self.fp32(x) + x = x.view(1, -1) + x = self.linear(x) + return x + + +class LSTMModel(nn.Module): + """Container module with an encoder, a recurrent module, and a decoder.""" + + def __init__(self, ntoken=10, ninp=512, nhid=256, nlayers=5, dropout=0.5): + super(LSTMModel, self).__init__() + self.drop = nn.Dropout(dropout) + self.encoder = nn.Embedding(ntoken, ninp) + self.rnn = nn.LSTM(ninp, nhid, nlayers, dropout=dropout) + self.decoder = nn.Linear(nhid, ntoken) + self.init_weights() + self.nhid = nhid + self.nlayers = nlayers + + def init_weights(self): + initrange = 0.1 + self.encoder.weight.data.uniform_(-initrange, initrange) + self.decoder.bias.data.zero_() + self.decoder.weight.data.uniform_(-initrange, initrange) + + def forward(self, input): + input = torch.ones((3, 10), dtype=torch.int32) + h0 = torch.randn(2, 10, 256) + c0 = torch.randn(2, 10, 256) + hidden = (h0, c0) + emb = self.encoder(input) + output, hidden = self.rnn(emb, hidden) + output = self.drop(output) + decoded = self.decoder(output) + return decoded, hidden + + +def eval_func(model): + # switch to evaluate mode + model.eval() + with torch.no_grad(): + input = torch.randn(1, 3, 224, 224) + # compute output + output = model(input) + return 0.0 + + +def train_func(compression_manager, model, dataloader=None): + compression_manager.callbacks.on_train_begin(dataloader=dataloader) + optimizer = torch.optim.SGD(model.parameters(), lr=0.0001) + # switch to evaluate mode + model.train() + input = torch.randn(1, 3, 224, 224) + # compute output + output = model(input) + loss = output[0].mean() if isinstance(output, tuple) else output.mean() + optimizer.zero_grad() + loss.backward() + optimizer.step() + compression_manager.callbacks.on_train_end() + return model + + +def q_func(model): + optimizer = torch.optim.SGD(model.parameters(), lr=0.0001) + # switch to evaluate mode + model.train() + input = torch.randn(1, 3, 224, 224) + # compute output + output = model(input) + loss = output.mean() + optimizer.zero_grad() + loss.backward() + optimizer.step() + return model + + +class TestPytorchAdaptor(unittest.TestCase): + model = q_resnet18() + + @classmethod + def tearDownClass(self): + shutil.rmtree("./saved", ignore_errors=True) + shutil.rmtree("runs", ignore_errors=True) + + def test_quantization_new_API(self): + for fake_yaml in ["dynamic", "qat", "static"]: + model = M() + if fake_yaml == "qat": + quant_conf = QuantizationAwareTrainingConfig(op_name_list=qat_op_name_list) + compression_manager = prepare_compression(copy.deepcopy(model), quant_conf) + q_model = train_func(compression_manager, compression_manager.model) + else: + dataset = DATASETS("pytorch")["dummy"]((100, 3, 224, 224)) + dataloader = DATALOADERS["pytorch"](dataset) + if fake_yaml == "dynamic": + quant_conf = PostTrainingQuantConfig(approach="dynamic", + op_name_list=dyn_op_name_list) + elif fake_yaml == "static": + quant_conf = PostTrainingQuantConfig(approach="static", + op_name_list=ptq_op_name_list) + q_model = quantization.fit( + model, + quant_conf, + calib_dataloader=dataloader if fake_yaml == "static" else None) + q_model.save("./saved") + # Load configure and weights by neural_compressor.utils + saved_model = load("./saved", model) + shutil.rmtree("./saved", ignore_errors=True) + + def test_auto_quant(self): + def eval_func(model): + return 1 + + model_origin = LSTMModel( + ntoken = 10, + ninp = 512, + nhid = 256, + nlayers = 2, + ) + # run fx_quant in neural_compressor and save the quantized GraphModule + quant_conf = PostTrainingQuantConfig(approach="auto") + set_workspace("./saved") + dataset = DATASETS("pytorch")["dummy"]((100, 3, 224, 224)) + dataloader = common.DataLoader(dataset) + q_model = quantization.fit(model_origin, + quant_conf, + calib_dataloader=dataloader, + eval_func=eval_func) + q_model.save("./saved") + model = common.Model(model_origin) + model.workspace_path = "./saved" + self.assertNotEqual(q_model, None) + self.assertEqual(type(q_model._model.decoder), + type(model._model.decoder)) + shutil.rmtree("./saved", ignore_errors=True) + + def test_tensorboard(self): + model = copy.deepcopy(self.model) + model.eval().fuse_model() + quant_conf = PostTrainingQuantConfig(approach="static", + backend="pytorch") + set_tensorboard(True) + dataset = DATASETS("pytorch")["dummy"]((100, 3, 224, 224)) + dataloader = common.DataLoader(dataset) + quantization.fit( + model, quant_conf, calib_dataloader=dataloader, eval_func=eval_func + ) + self.assertTrue(True if os.path.exists("runs/eval/baseline_acc0.0") else False) + quantization.fit(model, + quant_conf, + calib_dataloader=dataloader, + eval_dataloader=dataloader) + self.assertTrue(True if os.path.exists("runs/eval/baseline_acc0.0") else False) + set_tensorboard(False) + + +@unittest.skipIf(not FX_MODE, "Unsupport Fx Mode with PyTorch Version Below 1.8") +class TestPytorchFXAdaptor(unittest.TestCase): + @classmethod + def tearDownClass(self): + shutil.rmtree("./saved", ignore_errors=True) + shutil.rmtree("runs", ignore_errors=True) + + def test_fx_quant(self): + for fake_yaml in ["qat", "static"]: + model_origin = resnet18() + dataset = DATASETS("pytorch")["dummy"]((10, 3, 224, 224), label=True) + dataloader = DATALOADERS["pytorch"](dataset) + if fake_yaml == "qat": + conf = QuantizationAwareTrainingConfig( + op_name_list=qat_op_name_list, backend="pytorch_fx" + ) + compression_manager = prepare_compression(copy.deepcopy(model_origin), conf) + q_model = train_func(compression_manager, compression_manager.model, dataloader) + else: + conf = PostTrainingQuantConfig( + op_name_list=ptq_fx_op_name_list, backend="pytorch_fx" + ) + set_workspace("./saved") + q_model = quantization.fit(model_origin, + conf, + calib_dataloader=dataloader, + calib_func=eval_func) + q_model.save("./saved") + # Load configure and weights with neural_compressor.utils + model_fx = load("./saved", model_origin, + **{"dataloader": torch.utils.data.DataLoader(dataset)}) + self.assertTrue(isinstance(model_fx, torch.fx.graph_module.GraphModule)) + + if fake_yaml != "qat": + # recover int8 model with only tune_cfg + history_file = "./saved/history.snapshot" + model_fx_recover = recover(model_origin, history_file, 0, + **{"dataloader": dataloader}) + self.assertEqual(model_fx.code, model_fx_recover.code) + shutil.rmtree("./saved", ignore_errors=True) + for fake_yaml in ["qat", "static"]: + model_origin = M() + # run fx_quant in neural_compressor and save the quantized GraphModule + dataset = DATASETS("pytorch")["dummy"]((100, 3, 224, 224), label=True) + dataloader = DATALOADERS["pytorch"](dataset) + if fake_yaml == "qat": + conf = QuantizationAwareTrainingConfig( + op_name_list=qat_op_name_list, backend="pytorch_fx" + ) + compression_manager = prepare_compression(copy.deepcopy(model_origin), conf) + q_model = train_func(compression_manager, compression_manager.model, dataloader) + compression_manager.save("./saved") + else: + conf = PostTrainingQuantConfig( + op_name_list=ptq_fx_op_name_list, backend="pytorch_fx" + ) + q_model = quantization.fit(model_origin, + conf, + calib_dataloader=dataloader) + q_model.save("./saved") + # Load configure and weights with neural_compressor.utils + model_fx = load("./saved", model_origin, + **{"dataloader": torch.utils.data.DataLoader(dataset)}) + self.assertTrue(isinstance(model_fx, torch.fx.graph_module.GraphModule)) + shutil.rmtree("./saved", ignore_errors=True) + + @unittest.skipIf(PT_VERSION < Version("1.9.0").release, + "Please use PyTroch 1.9 or higher version for dynamic quantization with pytorch_fx backend") + def test_fx_dynamic_quant(self): + origin_model = LSTMModel( + ntoken = 10, + ninp = 512, + nhid = 256, + nlayers = 5, + ) + # run fx_quant in neural_compressor and save the quantized GraphModule + origin_model.eval() + conf = PostTrainingQuantConfig(approach="dynamic", + op_name_list=ptq_fx_op_name_list, backend="pytorch_fx" + ) + set_workspace("./saved") + q_model = quantization.fit(origin_model, conf) + q_model.save("./saved") + + # Load configure and weights by neural_compressor.utils + model_fx = load("./saved", origin_model) + self.assertTrue(isinstance(model_fx, torch.fx.graph_module.GraphModule)) + + # Test the functionality of older model saving type + state_dict = torch.load("./saved/best_model.pt") + tune_cfg = state_dict.pop("best_configure") + import yaml + with open("./saved/best_configure.yaml", "w") as f: + yaml.dump(tune_cfg, f, default_flow_style=False) + torch.save(state_dict, "./saved/best_model_weights.pt") + os.remove("./saved/best_model.pt") + model_fx = load("./saved", origin_model) + self.assertTrue(isinstance(model_fx, torch.fx.graph_module.GraphModule)) + + # recover int8 model with only tune_cfg + history_file = "./saved/history.snapshot" + model_fx_recover = recover(origin_model, history_file, 0) + self.assertEqual(model_fx.code, model_fx_recover.code) + shutil.rmtree("./saved", ignore_errors=True) + + def test_default_dynamic_quant(self): + def eval_func(model): + return 1 + + # Model Definition + for fake_yaml in ["qat", "auto"]: + model_origin = LSTMModel( + ntoken = 10, + ninp = 512, + nhid = 256, + nlayers = 2, + ) + dataset = DATASETS("pytorch")["dummy"]((3, 10)) + dataloader = DATALOADERS["pytorch"](dataset) + # run fx_quant in neural_compressor and save the quantized GraphModule + if fake_yaml == "qat": + conf = QuantizationAwareTrainingConfig( + op_name_list=qat_op_name_list, backend="pytorch_fx" + ) + compression_manager = prepare_compression(copy.deepcopy(model_origin), conf) + q_model = train_func(compression_manager, compression_manager.model, dataloader=dataloader) + self.assertTrue("quantize" in str(type(q_model.model.encoder))) + self.assertTrue("quantize" in str(type(q_model.model.rnn))) + else: + conf = PostTrainingQuantConfig(backend="pytorch_fx") + q_model = quantization.fit(model_origin, + conf, + calib_dataloader=dataloader) + self.assertTrue("quantize" in str(type(q_model.model.encoder))) + self.assertTrue("quantize" in str(type(q_model.model.rnn))) + + def test_fx_sub_module_quant(self): + for fake_yaml in ["qat", "static"]: + model_origin = DynamicControlModel() + dataset = DATASETS("pytorch")["dummy"]((1, 3, 224, 224)) + dataloader = DATALOADERS["pytorch"](dataset) + # run fx_quant in neural_compressor and save the quantized GraphModule + if fake_yaml == "qat": + conf = QuantizationAwareTrainingConfig( + op_name_list=qat_op_name_list, backend="pytorch_fx" + ) + compression_manager = prepare_compression(copy.deepcopy(model_origin), conf) + q_model = train_func(compression_manager, compression_manager.model, dataloader) + else: + set_workspace("./saved") + conf = PostTrainingQuantConfig(backend="pytorch_fx") + q_model = quantization.fit(model_origin, + conf, + calib_dataloader=dataloader) + q_model.save("./saved") + # Load configure and weights with neural_compressor.utils + model_fx = load("./saved/best_model.pt", model_origin, + **{"dataloader": torch.utils.data.DataLoader(dataset) + }) + self.assertTrue(isinstance(model_fx.sub, torch.fx.graph_module.GraphModule)) + + if fake_yaml != "qat": + # recover int8 model with only tune_cfg + history_file = "./saved/history.snapshot" + model_fx_recover = recover(model_origin, history_file, 0, + **{"dataloader": torch.utils.data.DataLoader(dataset) + }) + self.assertEqual(model_fx.sub.code, model_fx_recover.sub.code) + shutil.rmtree("./saved", ignore_errors=True) + + @unittest.skipIf(PT_VERSION < Version("1.11.0").release, + "Please use PyTroch 1.11 or higher version for mixed precision with pytorch_fx or pytorch backend") + def test_mix_precision(self): + model_origin = DynamicControlModel() + # run fx_quant in neural_compressor and save the quantized GraphModule + dataset = DATASETS("pytorch")["dummy"]((100, 3, 224, 224)) + dataloader = DATALOADERS["pytorch"](dataset) + set_workspace=("./saved") + conf = PostTrainingQuantConfig(op_name_list=ptq_fx_op_name_list, backend="pytorch_fx") + q_model = quantization.fit(model_origin, + conf, + calib_dataloader=dataloader, + calib_func = eval_func) + tune_cfg = q_model.q_config + tune_cfg["op"][("conv.module", "Conv2d")].clear() + tune_cfg["op"][("conv.module", "Conv2d")] = \ + {"weight": {"dtype": "bf16"}, "activation": {"dtype": "bf16"}} + tune_cfg["bf16_ops_list"].append(("conv.module", "Conv2d")) + from neural_compressor.adaptor.torch_utils.bf16_convert import Convert + q_model._model = Convert(q_model._model, tune_cfg) + + self.assertEqual(q_model._model.conv.module.module.weight.dtype, torch.bfloat16) + self.assertEqual(q_model._model.conv.module.module.bias.dtype, torch.bfloat16) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/adaptor/pytorch_adaptor/test_torch2onnx.py b/test/adaptor/pytorch_adaptor/test_torch2onnx.py index 977e621be84..8977b1a1dd4 100644 --- a/test/adaptor/pytorch_adaptor/test_torch2onnx.py +++ b/test/adaptor/pytorch_adaptor/test_torch2onnx.py @@ -8,9 +8,8 @@ import unittest import neural_compressor.adaptor.pytorch as nc_torch from neural_compressor import quantization -from neural_compressor.conf.pythonic_config import PostTrainingConfig, QuantizationAwareTrainingConfig +from neural_compressor.config import PostTrainingQuantConfig from neural_compressor.experimental.data.datasets.dataset import DATASETS -from neural_compressor.training import prepare_compression from packaging.version import Version from torch.quantization import QuantStub, DeQuantStub @@ -209,11 +208,9 @@ def test_fx_quant(self): for fake_yaml in ['dynamic', 'static']: model = DynamicControlModel() # run fx_quant in neural_compressor and save the quantized GraphModule - conf = PostTrainingConfig( - approach="post_training_dynamic_quant" \ - if fake_yaml == "dynamic" else "post_training_static_quant", - backend="pytorch_fx", - performance_only=True + conf = PostTrainingQuantConfig( + approach=fake_yaml, + backend="pytorch_fx" ) dataset = DATASETS("pytorch")['dummy']((100, 3, 224, 224)) dataloader = torch.utils.data.DataLoader(dataset) diff --git a/test/benchmark/test_benchmark.py b/test/benchmark/test_benchmark.py index 7815bb6cbfe..f32e65525fa 100644 --- a/test/benchmark/test_benchmark.py +++ b/test/benchmark/test_benchmark.py @@ -269,4 +269,4 @@ def test_benchmark_with_custom_metric(self): os.system("rm *.log") if __name__ == "__main__": - unittest.main() + unittest.main() \ No newline at end of file diff --git a/test/benchmark/test_benchmark_2.x.py b/test/benchmark/test_benchmark_2.x.py new file mode 100644 index 00000000000..fe5b0d0d710 --- /dev/null +++ b/test/benchmark/test_benchmark_2.x.py @@ -0,0 +1,176 @@ +"""Tests for neural_compressor benchmark""" +import psutil +import unittest +import os +import yaml +import numpy as np +import tensorflow as tf +import tempfile +import re +from neural_compressor.adaptor.tf_utils.util import write_graph + + +def build_benchmark(): + seq = ''' +from argparse import ArgumentParser +arg_parser = ArgumentParser(description='Parse args') +arg_parser.add_argument('--input_model', dest='input_model', default='input_model', help='input odel') +args = arg_parser.parse_args() +from neural_compressor.benchmark import fit +from neural_compressor.config import BenchmarkConfig +from neural_compressor.data import DATASETS +from neural_compressor.experimental import common +dataset = DATASETS('tensorflow')['dummy']((100, 32, 32, 1), label=True) +b_dataloader = common.DataLoader(dataset, batch_size=10) +conf = BenchmarkConfig(warmup=5, iteration=10, cores_per_instance=4, num_of_instance=2) +fit(args.input_model, conf, b_dataloader=b_dataloader) + ''' + + seq1 = ''' +from argparse import ArgumentParser +arg_parser = ArgumentParser(description='Parse args') +arg_parser.add_argument('--input_model', dest='input_model', default='input_model', help='input odel') +args = arg_parser.parse_args() +from neural_compressor.benchmark import fit +from neural_compressor.config import BenchmarkConfig +from neural_compressor.data import DATASETS +dataset = DATASETS('tensorflow')['dummy']((100, 32, 32, 1), label=True) +from neural_compressor.experimental import common +conf = BenchmarkConfig(warmup=5, iteration=10, cores_per_instance=4, num_of_instance=2) +b_dataloader = common.DataLoader(dataset, batch_size=10) +fit(args.input_model, conf, b_dataloader=b_dataloader) + ''' + + # test normal case + with open('fake.py', "w", encoding="utf-8") as f: + f.writelines(seq) + # test batchsize > len(dataset), use first batch + fake_data_5 = seq.replace('100, 32, 32, 1', '5, 32, 32, 1') + with open('fake_data_5.py', "w", encoding="utf-8") as f: + f.writelines(fake_data_5) + # test batchsize < len(dataset) < 2*batchsize, discard first batch + fake_data_15 = seq1.replace('100, 32, 32, 1', '15, 32, 32, 1') + with open('fake_data_15.py', "w", encoding="utf-8") as f: + f.writelines(fake_data_15) + # test 2*batchsize < len(dataset) < warmup*batchsize, discard last batch + fake_data_25 = seq1.replace('100, 32, 32, 1', '25, 32, 32, 1') + with open('fake_data_25.py', "w", encoding="utf-8") as f: + f.writelines(fake_data_25) + +def build_benchmark2(): + seq = [ + "from argparse import ArgumentParser\n", + "arg_parser = ArgumentParser(description='Parse args')\n", + "arg_parser.add_argument('--input_model', dest='input_model', default='input_model', help='input model')\n", + "args = arg_parser.parse_args()\n", + "from neural_compressor.benchmark import fit\n" + "from neural_compressor.data import DATASETS\n", + "dataset = DATASETS('tensorflow')['dummy']((5, 32, 32, 1), label=True)\n", + + "from neural_compressor.experimental import common\n", + "b_dataloader = common.DataLoader(dataset)\n", + "fit(args.input_model, b_dataloader=b_dataloader)\n" + ] + + with open('fake2.py', "w", encoding="utf-8") as f: + f.writelines(seq) + + +def build_fake_model(): + graph_path = tempfile.mkstemp(suffix='.pb')[1] + try: + graph = tf.Graph() + graph_def = tf.GraphDef() + with tf.Session(graph=graph) as sess: + x = tf.placeholder(tf.float64, shape=(None, 32, 32, 1), name='x') + y_1 = tf.constant(np.random.random((3, 3, 1, 1)), name='y_1') + y_2 = tf.constant(np.random.random((3, 3, 1, 1)), name='y_2') + conv1 = tf.nn.conv2d(input=x, filter=y_1, strides=[1, 1, 1, 1], \ + padding='VALID', name='conv1') + op = tf.nn.conv2d(input=conv1, filter=y_2, strides=[1, 1, 1, 1], \ + padding='VALID', name='op_to_store') + + sess.run(tf.global_variables_initializer()) + constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + + graph_def.ParseFromString(constant_graph.SerializeToString()) + write_graph(graph_def, graph_path) + except: + graph = tf.Graph() + graph_def = tf.compat.v1.GraphDef() + with tf.compat.v1.Session(graph=graph) as sess: + x = tf.compat.v1.placeholder(tf.float64, shape=(None, 32, 32, 1), name='x') + y_1 = tf.constant(np.random.random((3, 3, 1, 1)), name='y_1') + y_2 = tf.constant(np.random.random((3, 3, 1, 1)), name='y_2') + conv1 = tf.nn.conv2d(input=x, filters=y_1, strides=[1, 1, 1, 1], \ + padding='VALID', name='conv1') + op = tf.nn.conv2d(input=conv1, filters=y_2, strides=[1, 1, 1, 1], \ + padding='VALID', name='op_to_store') + + sess.run(tf.compat.v1.global_variables_initializer()) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + + graph_def.ParseFromString(constant_graph.SerializeToString()) + write_graph(graph_def, graph_path) + return graph_path + +class TestObjective(unittest.TestCase): + @classmethod + def setUpClass(self): + self.graph_path = build_fake_model() + build_benchmark() + build_benchmark2() + self.cpu_counts = psutil.cpu_count(logical=False) + + @classmethod + def tearDownClass(self): + if os.path.exists('fake.py'): + os.remove('fake.py') + if os.path.exists('fake2.py'): + os.remove('fake2.py') + if os.path.exists('fake_data_5.py'): + os.remove('fake_data_5.py') + if os.path.exists('fake_data_15.py'): + os.remove('fake_data_15.py') + if os.path.exists('fake_data_25.py'): + os.remove('fake_data_25.py') + + def test_benchmark(self): + os.system("python fake.py --input_model={}".format(self.graph_path)) + for i in range(2): + with open(f'2_4_{i}.log', "r") as f: + for line in f: + throughput = re.search(r"Throughput:\s+(\d+(\.\d+)?) images/sec", line) + self.assertIsNotNone(throughput) + os.system("rm *.log") + + def test_benchmark_data_5(self): + os.system("python fake_data_5.py --input_model={}".format(self.graph_path)) + for i in range(2): + with open(f'2_4_{i}.log', "r") as f: + for line in f: + throughput = re.search(r"Throughput:\s+(\d+(\.\d+)?) images/sec", line) + self.assertIsNotNone(throughput) + os.system("rm *.log") + + def test_benchmark_data_15(self): + os.system("python fake_data_15.py --input_model={}".format(self.graph_path)) + for i in range(2): + with open(f'2_4_{i}.log', "r") as f: + for line in f: + throughput = re.search(r"Throughput:\s+(\d+(\.\d+)?) images/sec", line) + self.assertIsNotNone(throughput) + os.system("rm *.log") + + def test_benchmark_data_25(self): + os.system("python fake_data_25.py --input_model={}".format(self.graph_path)) + for i in range(2): + with open(f'2_4_{i}.log', "r") as f: + for line in f: + throughput = re.search(r"Throughput:\s+(\d+(\.\d+)?) images/sec", line) + self.assertIsNotNone(throughput) + os.system("rm *.log") + + +if __name__ == "__main__": + unittest.main() diff --git a/test/distillation/test_distillation.py b/test/distillation/test_distillation.py index 4d63baf5c00..a5a993f2fdf 100644 --- a/test/distillation/test_distillation.py +++ b/test/distillation/test_distillation.py @@ -7,7 +7,7 @@ import torch.nn as nn import tensorflow as tf from neural_compressor.data import DATASETS -from neural_compressor.conf.pythonic_config import DistillationConfig, KnowledgeDistillationLossConfig +from neural_compressor.config import DistillationConfig, KnowledgeDistillationLossConfig from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader diff --git a/test/distillation/test_self_distillation.py b/test/distillation/test_self_distillation.py index 5bd29d37432..e05a40ae56e 100644 --- a/test/distillation/test_self_distillation.py +++ b/test/distillation/test_self_distillation.py @@ -5,7 +5,6 @@ import torch import torch.nn as nn import torchvision -from neural_compressor.conf.config import DistillationConf from neural_compressor.data import DATASETS from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import \ PyTorchDataLoader @@ -82,8 +81,8 @@ def tearDownClass(cls): def test_self_distillation(self): import copy from neural_compressor.training import prepare_compression - from neural_compressor.conf.pythonic_config import DistillationConfig, \ - SelfKnowledgeDistillationLossConfig + from neural_compressor.config import DistillationConfig, \ + SelfKnowledgeDistillationLossConfig datasets = DATASETS("pytorch") dummy_dataset = datasets["dummy"]( diff --git a/test/mixed_precision/test_mixed_precision.py b/test/mixed_precision/test_mixed_precision.py index a5d5e09bfc9..a05a3e25e5c 100644 --- a/test/mixed_precision/test_mixed_precision.py +++ b/test/mixed_precision/test_mixed_precision.py @@ -10,7 +10,7 @@ from neural_compressor import mix_precision from neural_compressor.utils.utility import LazyImport, CpuInfo from neural_compressor.adaptor.torch_utils.bf16_convert import BF16ModuleWrapper -from neural_compressor.conf.pythonic_config import MixedPrecisionConfig, Options +from neural_compressor.config import MixedPrecisionConfig, set_workspace, TuningCriterion from onnx import helper, TensorProto from packaging.version import Version from tensorflow.core.framework import attr_value_pb2 @@ -262,26 +262,26 @@ def setUpClass(self): def test_on_non_enabled_host(self): # test onnx - conf = MixedPrecisionConfig(precisions=["fp16"], backend="onnxrt_qlinearops") + conf = MixedPrecisionConfig(extra_precisions=["fp16"], backend="onnxrt_qlinearops") with self.assertRaises(SystemExit) as cm: output_model = mix_precision.fit(self.onnx_model, conf) self.assertEqual(cm.exception.code, 0) @unittest.skipIf(CpuInfo().bf16, 'skip since hardware support bf16') def test_on_non_enabled_host_tf(self): - conf = MixedPrecisionConfig(precisions=["bf16"], backend="tensorflow") + conf = MixedPrecisionConfig(extra_precisions=["bf16"], backend="tensorflow") with self.assertRaises(SystemExit) as cm: output_model = mix_precision.fit(self.tf_model, conf) self.assertEqual(cm.exception.code, 0) def test_on_non_enabled_dtype(self): # test onnx - conf = MixedPrecisionConfig(precisions=["bf16"], backend="onnxrt_qlinearops") + conf = MixedPrecisionConfig(extra_precisions=["bf16"], backend="onnxrt_qlinearops") with self.assertRaises(SystemExit) as cm: output_model = mix_precision.fit(self.onnx_model, conf) self.assertEqual(cm.exception.code, 0) - conf = MixedPrecisionConfig(precisions=["fp16"], backend="tensorflow") + conf = MixedPrecisionConfig(extra_precisions=["fp16"], backend="tensorflow") with self.assertRaises(SystemExit) as cm: output_model = mix_precision.fit(self.tf_model, conf) self.assertEqual(cm.exception.code, 0) @@ -310,16 +310,16 @@ def test_mixed_precision_with_evaluation(self): from neural_compressor.experimental import common from neural_compressor.experimental.metric.metric import ONNXRT_QL_METRICS # test onnx - conf = MixedPrecisionConfig(precisions=["fp16"], + conf = MixedPrecisionConfig(extra_precisions=["fp16"], backend="onnxrt_qlinearops") - options = Options(workspace="./saved") - output_model = mix_precision.fit(self.onnx_model, conf, options=options) + set_workspace("./saved") + output_model = mix_precision.fit(self.onnx_model, conf) self.assertFalse(any([i.op_type == 'Cast' for i in output_model.nodes()])) - conf = MixedPrecisionConfig(precisions=["fp16"], + tuning_criterion = TuningCriterion(max_trials=3, timeout=50) + conf = MixedPrecisionConfig(extra_precisions=["fp16"], backend="onnxrt_qlinearops", - max_trials=3, - timeout=50) + tuning_criterion=tuning_criterion) output_model = mix_precision.fit(self.onnx_model, conf, @@ -347,7 +347,7 @@ def eval2(model): from neural_compressor.experimental import MixedPrecision, common from neural_compressor import conf my_metric = Metric() - conf = MixedPrecisionConfig(precisions=["fp16"], + conf = MixedPrecisionConfig(extra_precisions=["fp16"], backend="onnxrt_qlinearops") output_model = mix_precision.fit(self.onnx_model, @@ -355,7 +355,7 @@ def eval2(model): eval_dataloader=common.DataLoader(self.matmul_dataset), eval_metric=my_metric) self.assertFalse(any([i.op_type == 'Cast' for i in output_model.nodes()])) - conf = MixedPrecisionConfig(precisions=["fp16"], + conf = MixedPrecisionConfig(extra_precisions=["fp16"], backend="onnxrt_qlinearops") output_model = mix_precision.fit(self.onnx_model, @@ -367,7 +367,7 @@ def eval2(model): conf = MixedPrecisionConfig( inputs="input", outputs="final", - precisions=["bf16", "fp32"], + extra_precisions=["bf16", "fp32"], ) output_model = mix_precision.fit( @@ -376,15 +376,15 @@ def eval2(model): eval_func=eval, ) self.assertTrue(any([i.op == 'Cast' for i in output_model.graph_def.node])) - self.assertEqual(conf.precisions, ['bf16', 'fp32']) + self.assertEqual(conf.extra_precisions, ['bf16', 'fp32']) self.assertEqual(conf.inputs, 'input') self.assertEqual(conf.outputs, 'final') + tuning_criterion = TuningCriterion(max_trials=4, timeout=500) conf = MixedPrecisionConfig( - max_trials=4, - timeout=500, - precisions=["bf16"], backend="tensorflow", + tuning_criterion=tuning_criterion, + extra_precisions=["bf16"], ) output_model = mix_precision.fit( common.Model(self.tf_model), @@ -393,12 +393,12 @@ def eval2(model): ) self.assertTrue(any([i.op == 'Cast' for i in output_model.graph_def.node])) + tuning_criterion = TuningCriterion(max_trials=1, timeout=100) conf = MixedPrecisionConfig( inputs="input", outputs="final, test", - max_trials=1, - timeout=100, - precisions=["bf16", "fp32"], + tuning_criterion=tuning_criterion, + extra_precisions=["bf16", "fp32"], ) output_model = mix_precision.fit( self.tf_model, @@ -414,7 +414,7 @@ def eval(model): return 0.5 conf = MixedPrecisionConfig( - precisions=["bf16"], + extra_precisions=["bf16"], backend="pytorch" ) output_model = mix_precision.fit( diff --git a/test/pruning/test_pruning.py b/test/pruning/test_pruning.py index b5b437639c0..3e1290e6bb7 100644 --- a/test/pruning/test_pruning.py +++ b/test/pruning/test_pruning.py @@ -6,7 +6,7 @@ import torchvision import torch.nn as nn -from neural_compressor.conf.pythonic_config import Pruner, PruningConfig +from neural_compressor.config import Pruner, PruningConfig from neural_compressor.data import DATASETS from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader from neural_compressor.training import prepare_compression