diff --git a/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt b/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt index 83a0d9c1521..89b544fc184 100644 --- a/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt +++ b/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt @@ -2397,6 +2397,8 @@ grappler amsgrad qoperator apis +PostTrainingQuantConfig +dgpu CPz Nsh UmK diff --git a/examples/.config/model_params_tensorflow.json b/examples/.config/model_params_tensorflow.json index db1e355e945..ed2866e420f 100644 --- a/examples/.config/model_params_tensorflow.json +++ b/examples/.config/model_params_tensorflow.json @@ -166,10 +166,8 @@ "model_src_dir": "image_recognition/keras_models/inception_resnet_v2/quantization/ptq", "dataset_location": "/tf_dataset/dataset/imagenet", "input_model": "/tf_dataset2/models/tensorflow/inception_resnet_v2_keras/saved_model/", - "yaml": "inception_resnet_v2.yaml", - "strategy": "basic", - "batch_size": 1, - "new_benchmark": true + "main_script": "main.py", + "batch_size": 32 }, "vgg16": { "model_src_dir": "image_recognition/tensorflow_models/quantization/ptq", @@ -292,10 +290,8 @@ "model_src_dir": "image_recognition/keras_models/resnetv2_50/quantization/ptq", "dataset_location": "/tf_dataset/dataset/imagenet", "input_model": "/tf_dataset2/models/tensorflow/resnetv2_50_keras/saved_model", - "yaml": "resnetv2_50.yaml", - "strategy": "basic", - "batch_size": 1, - "new_benchmark": true + "main_script": "main.py", + "batch_size": 32 }, "resnetv2_101": { "model_src_dir": "image_recognition/tensorflow_models/quantization/ptq", @@ -2494,10 +2490,8 @@ "model_src_dir": "image_recognition/keras_models/xception/quantization/ptq", "dataset_location": "/tf_dataset/dataset/imagenet", "input_model": "/tf_dataset2/models/tensorflow/xception_keras/saved_model/", - "yaml": "xception.yaml", - "strategy": "basic", - "batch_size": 1, - "new_benchmark": true + "main_script": "main.py", + "batch_size": 32 } } } diff --git a/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/README.md b/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/README.md index 7598808aa4c..e04fb5645a4 100644 --- a/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/README.md +++ b/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/README.md @@ -26,7 +26,8 @@ Intel Extension for Tensorflow is mandatory to be installed for quantizing the m ```shell pip install --upgrade intel-extension-for-tensorflow[gpu] ``` -For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers) +Please refer to the [Installation Guides](https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html) for latest Intel GPU driver installation. +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers). #### Quantizing the model on Intel CPU(Experimental) Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. @@ -43,12 +44,20 @@ python prepare_model.py --output_model=/path/to/model ``` `--output_model ` the model should be saved as SavedModel format or H5 format. -## Write Yaml config file -In examples directory, there is a inception_resnet_v2.yaml for tuning the model on Intel CPUs. The 'framework' in the yaml is set to 'tensorflow'. If running this example on Intel GPUs, the 'framework' should be set to 'tensorflow_itex' and the device in yaml file should be set to 'gpu'. The inception_resnet_v2_itex.yaml is prepared for the GPU case. We could remove most of items and only keep mandatory item for tuning. We also implement a calibration dataloader and have evaluation field for creation of evaluation function at internal neural_compressor. +## Quantization Config +The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'. + +``` +config = PostTrainingQuantConfig( + device="gpu", + backend="itex", + ... + ) +``` ## Run Command ```shell - bash run_tuning.sh --config=inception_resnet_v2.yaml --input_model=./path/to/model --output_model=./result --eval_data=/path/to/evaluation/dataset --calib_data=/path/to/calibration/dataset - bash run_benchmark.sh --config=inception_resnet_v2.yaml --input_model=./path/to/model --mode=performance --eval_data=/path/to/evaluation/dataset + bash run_tuning.sh --input_model=./path/to/model --output_model=./result --dataset_location=/path/to/evaluation/dataset --batch_size=32 + bash run_benchmark.sh --input_model=./path/to/model --mode=performance --dataset_location=/path/to/evaluation/dataset --batch_size=1 ``` diff --git a/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/inception_resnet_v2.yaml b/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/inception_resnet_v2.yaml deleted file mode 100644 index 0fb00e3407a..00000000000 --- a/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/inception_resnet_v2.yaml +++ /dev/null @@ -1,44 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -version: 1.0 - -model: # mandatory. used to specify model specific information. - name: inception_resnet_v2 - framework: tensorflow # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: cpu # optional. default value is cpu, other value is gpu. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: - performance: # optional. used to benchmark performance of passing model. - iteration: 100 - configs: - cores_per_instance: 4 - num_of_instance: 7 - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/inception_resnet_v2_itex.yaml b/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/inception_resnet_v2_itex.yaml deleted file mode 100644 index 85028bffccd..00000000000 --- a/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/inception_resnet_v2_itex.yaml +++ /dev/null @@ -1,44 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -version: 1.0 - -model: # mandatory. used to specify model specific information. - name: inception_resnet_v2 - framework: tensorflow_itex # mandatory. supported values are tensorflow, tensorflow_itex, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: gpu # optional. set cpu if installed intel-extension-for-tensorflow[cpu], set gpu if installed intel-extension-for-tensorflow[gpu]. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: - performance: # optional. used to benchmark performance of passing model. - iteration: 100 - configs: - cores_per_instance: 4 - num_of_instance: 7 - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/main.py b/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/main.py index 019806a9940..94761482429 100644 --- a/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/main.py @@ -1,7 +1,7 @@ # # -*- coding: utf-8 -*- # -# Copyright (c) 2018 Intel Corporation +# Copyright (c) 2022 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,11 +16,9 @@ # limitations under the License. # import time -import shutil import numpy as np -from argparse import ArgumentParser -from neural_compressor import data import tensorflow as tf +from neural_compressor.utils import logger tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) flags = tf.compat.v1.flags @@ -42,87 +40,111 @@ flags.DEFINE_bool( 'benchmark', False, 'whether to benchmark the model') -flags.DEFINE_string( - 'config', 'bert.yaml', 'yaml configuration of the model') - flags.DEFINE_string( 'calib_data', None, 'location of calibration dataset') flags.DEFINE_string( 'eval_data', None, 'location of evaluate dataset') -from neural_compressor.experimental.metric.metric import TensorflowTopK -from neural_compressor.experimental.data.transforms.transform import ComposeTransform -from neural_compressor.experimental.data.datasets.dataset import TensorflowImageRecord -from neural_compressor.experimental.data.transforms.imagenet_transform import LabelShift -from neural_compressor.experimental.data.dataloaders.default_dataloader import DefaultDataLoader +flags.DEFINE_integer('batch_size', 32, 'batch_size') + +flags.DEFINE_integer( + 'iters', 100, 'maximum iteration when evaluating performance') + +from neural_compressor.metric.metric import TensorflowTopK +from neural_compressor.data.transforms.transform import ComposeTransform +from neural_compressor.data.datasets.dataset import TensorflowImageRecord +from neural_compressor.data.transforms.imagenet_transform import LabelShift +from neural_compressor.data.dataloaders.default_dataloader import DefaultDataLoader from neural_compressor.data.transforms.imagenet_transform import BilinearImagenetTransform eval_dataset = TensorflowImageRecord(root=FLAGS.eval_data, transform=ComposeTransform(transform_list= \ - [BilinearImagenetTransform(height=299, width=299)])) -if FLAGS.benchmark and FLAGS.mode == 'performance': - eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=1) -else: - eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=32) + [BilinearImagenetTransform(height=299, width=299)])) + +eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=FLAGS.batch_size) + if FLAGS.calib_data: - calib_dataset = TensorflowImageRecord(root=FLAGS.calib_data, transform=ComposeTransform(transform_list= \ - [BilinearImagenetTransform(height=299, width=299)])) - calib_dataloader = DefaultDataLoader(dataset=calib_dataset, batch_size=10) - -def evaluate(model, measurer=None): - """ - Custom evaluate function to inference the model for specified metric on validation dataset. - - Args: - model (tf.saved_model.load): The input model will be the class of tf.saved_model.load(quantized_model_path). - measurer (object, optional): for benchmark measurement of duration. - - Returns: - accuracy (float): evaluation result, the larger is better. - """ - infer = model.signatures["serving_default"] - output_dict_keys = infer.structured_outputs.keys() - output_name = list(output_dict_keys )[0] - postprocess = LabelShift(label_shift=1) - metric = TensorflowTopK(k=1) - - def eval_func(dataloader, metric): - results = [] - for idx, (inputs, labels) in enumerate(dataloader): - inputs = np.array(inputs) - input_tensor = tf.constant(inputs) - if measurer: - measurer.start() - predictions = infer(input_tensor)[output_name] - if measurer: - measurer.end() - predictions = predictions.numpy() - predictions, labels = postprocess((predictions, labels)) - metric.update(predictions, labels) - return results - - results = eval_func(eval_dataloader, metric) - acc = metric.result() - return acc + calib_dataset = TensorflowImageRecord(root=FLAGS.calib_data, transform= \ + ComposeTransform(transform_list= [BilinearImagenetTransform(height=299, width=299)])) + calib_dataloader = DefaultDataLoader(dataset=calib_dataset, batch_size=10) + +def evaluate(model): + """ + Custom evaluate function to inference the model for specified metric on validation dataset. + + Args: + model (tf.saved_model.load): The input model will be the class of tf.saved_model.load(quantized_model_path). + measurer (object, optional): for benchmark measurement of duration. + + Returns: + accuracy (float): evaluation result, the larger is better. + """ + infer = model.signatures["serving_default"] + output_dict_keys = infer.structured_outputs.keys() + output_name = list(output_dict_keys )[0] + postprocess = LabelShift(label_shift=1) + metric = TensorflowTopK(k=1) + latency_list = [] + + def eval_func(dataloader, metric): + warmup = 5 + iteration = None + + if FLAGS.benchmark and FLAGS.mode == 'performance': + iteration = FLAGS.iters + for idx, (inputs, labels) in enumerate(dataloader): + inputs = np.array(inputs) + input_tensor = tf.constant(inputs) + start = time.time() + predictions = infer(input_tensor)[output_name] + end = time.time() + latency_list.append(end - start) + predictions = predictions.numpy() + predictions, labels = postprocess((predictions, labels)) + metric.update(predictions, labels) + if iteration and idx >= iteration: + break + latency = np.array(latency_list[warmup:]).mean() / eval_dataloader.batch_size + return latency + + latency = eval_func(eval_dataloader, metric) + if FLAGS.benchmark: + logger.info("\n{} mode benchmark result:".format(FLAGS.mode)) + for i, res in enumerate(latency_list): + logger.debug("Iteration {} result {}:".format(i, res)) + if FLAGS.benchmark and FLAGS.mode == 'performance': + logger.info("Batch size = {}".format(eval_dataloader.batch_size)) + logger.info("Latency: {:.3f} ms".format(latency * 1000)) + logger.info("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() + return acc def main(_): - if FLAGS.tune: - from neural_compressor.experimental import Quantization, common - quantizer = Quantization(FLAGS.config) - quantizer.model = common.Model(FLAGS.input_model) - quantizer.eval_func = evaluate - quantizer.calib_dataloader = calib_dataloader - q_model = quantizer.fit() - q_model.save(FLAGS.output_model) - - - if FLAGS.benchmark: - from neural_compressor.experimental import Benchmark, common - evaluator = Benchmark(FLAGS.config) - evaluator.model = common.Model(FLAGS.input_model) - evaluator.b_func = evaluate - evaluator.b_dataloader = eval_dataloader - evaluator(FLAGS.mode) + if FLAGS.tune: + from neural_compressor.quantization import fit + from neural_compressor.config import PostTrainingQuantConfig + from neural_compressor.utils.utility import set_random_seed + set_random_seed(9527) + config = PostTrainingQuantConfig(calibration_sampling_size=[50, 100]) + q_model = fit( + model=FLAGS.input_model, + conf=config, + calib_dataloader=calib_dataloader, + eval_dataloader=eval_dataloader, + eval_func=evaluate) + q_model.save(FLAGS.output_model) + + if FLAGS.benchmark: + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + if FLAGS.mode == 'performance': + conf = BenchmarkConfig(iteration=100, cores_per_instance=4, num_of_instance=7) + fit(FLAGS.input_model, conf, b_func=evaluate) + else: + from neural_compressor.model.model import Model + accuracy = evaluate(Model(FLAGS.input_model).model) + logger.info('Batch size = %d' % FLAGS.batch_size) + logger.info("Accuracy: %.5f" % accuracy) if __name__ == "__main__": tf.compat.v1.app.run() diff --git a/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/prepare_model.py b/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/prepare_model.py index 9923a076ef8..f7ea841eef2 100644 --- a/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/prepare_model.py +++ b/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/prepare_model.py @@ -1,3 +1,21 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + import argparse import tensorflow as tf def get_inception_resnet_v2_model(saved_path): diff --git a/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/run_benchmark.sh b/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/run_benchmark.sh index ca49af56795..d464b019f8e 100644 --- a/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/run_benchmark.sh +++ b/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/run_benchmark.sh @@ -10,21 +10,26 @@ function main { # init params function init_params { + batch_size=32 + iters=100 + for var in "$@" do case $var in - --config=*) - config=$(echo $var |cut -f2 -d=) - ;; --input_model=*) input_model=$(echo $var |cut -f2 -d=) ;; --mode=*) mode=$(echo $var |cut -f2 -d=) ;; - --eval_data=*) - eval_data=$(echo $var |cut -f2 -d=) + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) ;; + --iters=*) + iters=$(echo $var |cut -f2 -d=) esac done @@ -35,10 +40,11 @@ function run_benchmark { python main.py \ --input_model ${input_model} \ - --config ${config} \ --benchmark \ --mode ${mode} \ - --eval_data ${eval_data} + --eval_data ${dataset_location} \ + --batch_size ${batch_size} \ + --iters ${iters} } main "$@" diff --git a/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/run_tuning.sh b/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/run_tuning.sh index 666154ca113..7e3ed727f71 100644 --- a/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/run_tuning.sh +++ b/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/run_tuning.sh @@ -13,20 +13,14 @@ function init_params { for var in "$@" do case $var in - --config=*) - config=$(echo $var |cut -f2 -d=) - ;; --input_model=*) input_model=$(echo $var |cut -f2 -d=) ;; --output_model=*) output_model=$(echo $var |cut -f2 -d=) ;; - --eval_data=*) - eval_data=$(echo $var |cut -f2 -d=) - ;; - --calib_data=*) - calib_data=$(echo $var |cut -f2 -d=) + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) ;; esac done @@ -38,9 +32,8 @@ function run_tuning { python main.py \ --input_model ${input_model} \ --output_model ${output_model} \ - --config ${config} \ - --eval_data ${eval_data} \ - --calib_data ${calib_data} \ + --eval_data ${dataset_location} \ + --calib_data ${dataset_location} \ --tune } diff --git a/examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/README.md b/examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/README.md index 720bf7fcc42..27bf8ef5e3b 100644 --- a/examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/README.md +++ b/examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/README.md @@ -25,7 +25,8 @@ Intel Extension for Tensorflow is mandatory to be installed for quantizing the m ```shell pip install --upgrade intel-extension-for-tensorflow[gpu] ``` -For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers) +Please refer to the [Installation Guides](https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html) for latest Intel GPU driver installation. +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers). #### Quantizing the model on Intel CPU(Experimental) Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. @@ -42,12 +43,20 @@ python prepare_model.py --output_model=/path/to/model ``` `--output_model ` the model should be saved as SavedModel format or H5 format. -## Write Yaml config file -In examples directory, there is a resnetv2_50.yaml for tuning the model on Intel CPUs. The 'framework' in the yaml is set to 'tensorflow'. If running this example on Intel GPUs, the 'framework' should be set to 'tensorflow_itex' and the device in yaml file should be set to 'gpu'. The resnetv2_50_itex.yaml is prepared for the GPU case. We could remove most of items and only keep mandatory item for tuning. We also implement a calibration dataloader and have evaluation field for creation of evaluation function at internal neural_compressor. +## Quantization Config +The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'. + +``` +config = PostTrainingQuantConfig( + device="gpu", + backend="itex", + ... + ) +``` ## Run Command ```shell - bash run_tuning.sh --config=resnetv2_50.yaml --input_model=./path/to/model --output_model=./result --eval_data=/path/to/evaluation/dataset --calib_data=/path/to/calibration/dataset - bash run_benchmark.sh --config=resnetv2_50.yaml --input_model=./path/to/model --mode=performance --eval_data=/path/to/evaluation/dataset + bash run_tuning.sh --input_model=./path/to/model --output_model=./result --dataset_location=/path/to/evaluation/dataset --batch_size=32 + bash run_benchmark.sh --input_model=./path/to/model --mode=performance --dataset_location=/path/to/evaluation/dataset --batch_size=1 ``` diff --git a/examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/main.py b/examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/main.py index 23c1c00e096..6cab4ffed45 100644 --- a/examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/main.py @@ -16,11 +16,9 @@ # limitations under the License. # import time -import shutil import numpy as np -from argparse import ArgumentParser -from neural_compressor import data import tensorflow as tf +from neural_compressor.utils import logger tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) flags = tf.compat.v1.flags @@ -42,87 +40,111 @@ flags.DEFINE_bool( 'benchmark', False, 'whether to benchmark the model') -flags.DEFINE_string( - 'config', 'bert.yaml', 'yaml configuration of the model') - flags.DEFINE_string( 'calib_data', None, 'location of calibration dataset') flags.DEFINE_string( 'eval_data', None, 'location of evaluate dataset') -from neural_compressor.experimental.metric.metric import TensorflowTopK -from neural_compressor.experimental.data.transforms.transform import ComposeTransform -from neural_compressor.experimental.data.datasets.dataset import TensorflowImageRecord -from neural_compressor.experimental.data.transforms.imagenet_transform import LabelShift -from neural_compressor.experimental.data.dataloaders.default_dataloader import DefaultDataLoader +flags.DEFINE_integer('batch_size', 32, 'batch_size') + +flags.DEFINE_integer( + 'iters', 100, 'maximum iteration when evaluating performance') + +from neural_compressor.metric.metric import TensorflowTopK +from neural_compressor.data.transforms.transform import ComposeTransform +from neural_compressor.data.datasets.dataset import TensorflowImageRecord +from neural_compressor.data.transforms.imagenet_transform import LabelShift +from neural_compressor.data.dataloaders.default_dataloader import DefaultDataLoader from neural_compressor.data.transforms.imagenet_transform import BilinearImagenetTransform eval_dataset = TensorflowImageRecord(root=FLAGS.eval_data, transform=ComposeTransform(transform_list= \ - [BilinearImagenetTransform(height=224, width=224)])) -if FLAGS.benchmark and FLAGS.mode == 'performance': - eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=1) -else: - eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=32) + [BilinearImagenetTransform(height=224, width=224)])) + +eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=FLAGS.batch_size) + if FLAGS.calib_data: - calib_dataset = TensorflowImageRecord(root=FLAGS.calib_data, transform=ComposeTransform(transform_list= \ - [BilinearImagenetTransform(height=224, width=224)])) - calib_dataloader = DefaultDataLoader(dataset=calib_dataset, batch_size=10) - -def evaluate(model, measurer=None): - """ - Custom evaluate function to inference the model for specified metric on validation dataset. - - Args: - model (tf.saved_model.load): The input model will be the class of tf.saved_model.load(quantized_model_path). - measurer (object, optional): for benchmark measurement of duration. - - Returns: - accuracy (float): evaluation result, the larger is better. - """ - infer = model.signatures["serving_default"] - output_dict_keys = infer.structured_outputs.keys() - output_name = list(output_dict_keys )[0] - postprocess = LabelShift(label_shift=1) - metric = TensorflowTopK(k=1) - - def eval_func(dataloader, metric): - results = [] - for idx, (inputs, labels) in enumerate(dataloader): - inputs = np.array(inputs) - input_tensor = tf.constant(inputs) - if measurer: - measurer.start() - predictions = infer(input_tensor)[output_name] - if measurer: - measurer.end() - predictions = predictions.numpy() - predictions, labels = postprocess((predictions, labels)) - metric.update(predictions, labels) - return results - - results = eval_func(eval_dataloader, metric) - acc = metric.result() - return acc + calib_dataset = TensorflowImageRecord(root=FLAGS.calib_data, transform= \ + ComposeTransform(transform_list= [BilinearImagenetTransform(height=224, width=224)])) + calib_dataloader = DefaultDataLoader(dataset=calib_dataset, batch_size=10) + +def evaluate(model): + """ + Custom evaluate function to inference the model for specified metric on validation dataset. + + Args: + model (tf.saved_model.load): The input model will be the class of tf.saved_model.load(quantized_model_path). + measurer (object, optional): for benchmark measurement of duration. + + Returns: + accuracy (float): evaluation result, the larger is better. + """ + infer = model.signatures["serving_default"] + output_dict_keys = infer.structured_outputs.keys() + output_name = list(output_dict_keys )[0] + postprocess = LabelShift(label_shift=1) + metric = TensorflowTopK(k=1) + latency_list = [] + + def eval_func(dataloader, metric): + warmup = 5 + iteration = None + if FLAGS.benchmark and FLAGS.mode == 'performance': + iteration = FLAGS.iters + for idx, (inputs, labels) in enumerate(dataloader): + inputs = np.array(inputs) + input_tensor = tf.constant(inputs) + start = time.time() + predictions = infer(input_tensor)[output_name] + end = time.time() + latency_list.append(end - start) + predictions = predictions.numpy() + predictions, labels = postprocess((predictions, labels)) + metric.update(predictions, labels) + if iteration and idx >= iteration: + break + latency = np.array(latency_list[warmup:]).mean() / eval_dataloader.batch_size + return latency + + latency = eval_func(eval_dataloader, metric) + if FLAGS.benchmark: + logger.info("\n{} mode benchmark result:".format(FLAGS.mode)) + for i, res in enumerate(latency_list): + logger.debug("Iteration {} result {}:".format(i, res)) + if FLAGS.benchmark and FLAGS.mode == 'performance': + logger.info("Batch size = {}".format(eval_dataloader.batch_size)) + logger.info("Latency: {:.3f} ms".format(latency * 1000)) + logger.info("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() + return acc def main(_): - if FLAGS.tune: - from neural_compressor.experimental import Quantization, common - quantizer = Quantization(FLAGS.config) - quantizer.model = common.Model(FLAGS.input_model) - quantizer.eval_func = evaluate - quantizer.calib_dataloader = calib_dataloader - q_model = quantizer.fit() - q_model.save(FLAGS.output_model) - - - if FLAGS.benchmark: - from neural_compressor.experimental import Benchmark, common - evaluator = Benchmark(FLAGS.config) - evaluator.model = common.Model(FLAGS.input_model) - evaluator.b_func = evaluate - evaluator.b_dataloader = eval_dataloader - evaluator(FLAGS.mode) + if FLAGS.tune: + from neural_compressor.quantization import fit + from neural_compressor.config import PostTrainingQuantConfig + from neural_compressor.utils.utility import set_random_seed + set_random_seed(9527) + config = PostTrainingQuantConfig( + calibration_sampling_size=[50, 100]) + q_model = fit( + model=FLAGS.input_model, + conf=config, + calib_dataloader=calib_dataloader, + eval_dataloader=eval_dataloader, + eval_func=evaluate) + q_model.save(FLAGS.output_model) + + if FLAGS.benchmark: + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + if FLAGS.mode == 'performance': + conf = BenchmarkConfig(iteration=100, cores_per_instance=4, num_of_instance=7) + fit(FLAGS.input_model, conf, b_func=evaluate) + else: + from neural_compressor.model.model import Model + accuracy = evaluate(Model(FLAGS.input_model).model) + logger.info('Batch size = %d' % FLAGS.batch_size) + logger.info("Accuracy: %.5f" % accuracy) if __name__ == "__main__": tf.compat.v1.app.run() diff --git a/examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/prepare_model.py b/examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/prepare_model.py index cddc7949a2c..f8cd505f965 100644 --- a/examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/prepare_model.py +++ b/examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/prepare_model.py @@ -1,3 +1,21 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + import argparse import tensorflow as tf def get_resnet50_v2_model(saved_path): diff --git a/examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/resnetv2_50.yaml b/examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/resnetv2_50.yaml deleted file mode 100644 index e65e6efce3c..00000000000 --- a/examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/resnetv2_50.yaml +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -version: 1.0 - -model: # mandatory. used to specify model specific information. - name: resnetv2_50 - framework: tensorflow # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: cpu # optional. default value is cpu, other value is gpu. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - performance: # optional. used to benchmark performance of passing model. - configs: - cores_per_instance: 4 - num_of_instance: 7 - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/resnetv2_50_itex.yaml b/examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/resnetv2_50_itex.yaml deleted file mode 100644 index efbd1a2b95a..00000000000 --- a/examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/resnetv2_50_itex.yaml +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -version: 1.0 - -model: # mandatory. used to specify model specific information. - name: resnetv2_50 - framework: tensorflow_itex # mandatory. supported values are tensorflow, tensorflow_itex, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: gpu # optional. set cpu if installed intel-extension-for-tensorflow[cpu], set gpu if installed intel-extension-for-tensorflow[gpu]. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - performance: # optional. used to benchmark performance of passing model. - configs: - cores_per_instance: 4 - num_of_instance: 7 - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/run_benchmark.sh b/examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/run_benchmark.sh index ca49af56795..d464b019f8e 100644 --- a/examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/run_benchmark.sh +++ b/examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/run_benchmark.sh @@ -10,21 +10,26 @@ function main { # init params function init_params { + batch_size=32 + iters=100 + for var in "$@" do case $var in - --config=*) - config=$(echo $var |cut -f2 -d=) - ;; --input_model=*) input_model=$(echo $var |cut -f2 -d=) ;; --mode=*) mode=$(echo $var |cut -f2 -d=) ;; - --eval_data=*) - eval_data=$(echo $var |cut -f2 -d=) + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) ;; + --iters=*) + iters=$(echo $var |cut -f2 -d=) esac done @@ -35,10 +40,11 @@ function run_benchmark { python main.py \ --input_model ${input_model} \ - --config ${config} \ --benchmark \ --mode ${mode} \ - --eval_data ${eval_data} + --eval_data ${dataset_location} \ + --batch_size ${batch_size} \ + --iters ${iters} } main "$@" diff --git a/examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/run_tuning.sh b/examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/run_tuning.sh index 666154ca113..7e3ed727f71 100644 --- a/examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/run_tuning.sh +++ b/examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/run_tuning.sh @@ -13,20 +13,14 @@ function init_params { for var in "$@" do case $var in - --config=*) - config=$(echo $var |cut -f2 -d=) - ;; --input_model=*) input_model=$(echo $var |cut -f2 -d=) ;; --output_model=*) output_model=$(echo $var |cut -f2 -d=) ;; - --eval_data=*) - eval_data=$(echo $var |cut -f2 -d=) - ;; - --calib_data=*) - calib_data=$(echo $var |cut -f2 -d=) + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) ;; esac done @@ -38,9 +32,8 @@ function run_tuning { python main.py \ --input_model ${input_model} \ --output_model ${output_model} \ - --config ${config} \ - --eval_data ${eval_data} \ - --calib_data ${calib_data} \ + --eval_data ${dataset_location} \ + --calib_data ${dataset_location} \ --tune } diff --git a/examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/README.md b/examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/README.md index 32d90532981..27bf8ef5e3b 100644 --- a/examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/README.md +++ b/examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/README.md @@ -25,7 +25,8 @@ Intel Extension for Tensorflow is mandatory to be installed for quantizing the m ```shell pip install --upgrade intel-extension-for-tensorflow[gpu] ``` -For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers) +Please refer to the [Installation Guides](https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html) for latest Intel GPU driver installation. +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers). #### Quantizing the model on Intel CPU(Experimental) Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. @@ -42,12 +43,20 @@ python prepare_model.py --output_model=/path/to/model ``` `--output_model ` the model should be saved as SavedModel format or H5 format. -## Write Yaml config file -In examples directory, there is a xception.yaml for tuning the model on Intel CPUs. The 'framework' in the yaml is set to 'tensorflow'. If running this example on Intel GPUs, the 'framework' should be set to 'tensorflow_itex' and the device in yaml file should be set to 'gpu'. The xception_itex.yaml is prepared for the GPU case. We could remove most of items and only keep mandatory item for tuning. We also implement a calibration dataloader and have evaluation field for creation of evaluation function at internal neural_compressor. +## Quantization Config +The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'. + +``` +config = PostTrainingQuantConfig( + device="gpu", + backend="itex", + ... + ) +``` ## Run Command ```shell - bash run_tuning.sh --config=xception.yaml --input_model=./path/to/model --output_model=./result --eval_data=/path/to/evaluation/dataset --calib_data=/path/to/calibration/dataset - bash run_benchmark.sh --config=xception.yaml --input_model=./path/to/model --mode=performance --eval_data=/path/to/evaluation/dataset + bash run_tuning.sh --input_model=./path/to/model --output_model=./result --dataset_location=/path/to/evaluation/dataset --batch_size=32 + bash run_benchmark.sh --input_model=./path/to/model --mode=performance --dataset_location=/path/to/evaluation/dataset --batch_size=1 ``` diff --git a/examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/main.py b/examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/main.py index 0c427ed0d4e..a4720c85b5e 100644 --- a/examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/main.py @@ -16,11 +16,9 @@ # limitations under the License. # import time -import shutil import numpy as np -from argparse import ArgumentParser -from neural_compressor import data import tensorflow as tf +from neural_compressor.utils import logger tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) flags = tf.compat.v1.flags @@ -42,87 +40,112 @@ flags.DEFINE_bool( 'benchmark', False, 'whether to benchmark the model') -flags.DEFINE_string( - 'config', 'bert.yaml', 'yaml configuration of the model') - flags.DEFINE_string( 'calib_data', None, 'location of calibration dataset') flags.DEFINE_string( 'eval_data', None, 'location of evaluate dataset') -from neural_compressor.experimental.metric.metric import TensorflowTopK -from neural_compressor.experimental.data.transforms.transform import ComposeTransform -from neural_compressor.experimental.data.datasets.dataset import TensorflowImageRecord -from neural_compressor.experimental.data.transforms.imagenet_transform import LabelShift -from neural_compressor.experimental.data.dataloaders.default_dataloader import DefaultDataLoader +flags.DEFINE_integer('batch_size', 32, 'batch_size') + +flags.DEFINE_integer( + 'iters', 100, 'maximum iteration when evaluating performance') + +from neural_compressor.metric.metric import TensorflowTopK +from neural_compressor.data.transforms.transform import ComposeTransform +from neural_compressor.data.datasets.dataset import TensorflowImageRecord +from neural_compressor.data.transforms.imagenet_transform import LabelShift +from neural_compressor.data.dataloaders.default_dataloader import DefaultDataLoader from neural_compressor.data.transforms.imagenet_transform import BilinearImagenetTransform eval_dataset = TensorflowImageRecord(root=FLAGS.eval_data, transform=ComposeTransform(transform_list= \ - [BilinearImagenetTransform(height=299, width=299)])) -if FLAGS.benchmark and FLAGS.mode == 'performance': - eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=1) -else: - eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=32) + [BilinearImagenetTransform(height=299, width=299)])) + +eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=FLAGS.batch_size) + + if FLAGS.calib_data: - calib_dataset = TensorflowImageRecord(root=FLAGS.calib_data, transform=ComposeTransform(transform_list= \ - [BilinearImagenetTransform(height=299, width=299)])) - calib_dataloader = DefaultDataLoader(dataset=calib_dataset, batch_size=10) - -def evaluate(model, measurer=None): - """ - Custom evaluate function to inference the model for specified metric on validation dataset. - - Args: - model (tf.saved_model.load): The input model will be the class of tf.saved_model.load(quantized_model_path). - measurer (object, optional): for benchmark measurement of duration. - - Returns: - accuracy (float): evaluation result, the larger is better. - """ - infer = model.signatures["serving_default"] - output_dict_keys = infer.structured_outputs.keys() - output_name = list(output_dict_keys )[0] - postprocess = LabelShift(label_shift=1) - metric = TensorflowTopK(k=1) - - def eval_func(dataloader, metric): - results = [] - for idx, (inputs, labels) in enumerate(dataloader): - inputs = np.array(inputs) - input_tensor = tf.constant(inputs) - if measurer: - measurer.start() - predictions = infer(input_tensor)[output_name] - if measurer: - measurer.end() - predictions = predictions.numpy() - predictions, labels = postprocess((predictions, labels)) - metric.update(predictions, labels) - return results - - results = eval_func(eval_dataloader, metric) - acc = metric.result() - return acc + calib_dataset = TensorflowImageRecord(root=FLAGS.calib_data, transform= \ + ComposeTransform(transform_list= [BilinearImagenetTransform(height=299, width=299)])) + calib_dataloader = DefaultDataLoader(dataset=calib_dataset, batch_size=10) + +def evaluate(model): + """ + Custom evaluate function to inference the model for specified metric on validation dataset. + + Args: + model (tf.saved_model.load): The input model will be the class of tf.saved_model.load(quantized_model_path). + measurer (object, optional): for benchmark measurement of duration. + + Returns: + accuracy (float): evaluation result, the larger is better. + """ + infer = model.signatures["serving_default"] + output_dict_keys = infer.structured_outputs.keys() + output_name = list(output_dict_keys )[0] + postprocess = LabelShift(label_shift=1) + metric = TensorflowTopK(k=1) + latency_list = [] + + def eval_func(dataloader, metric): + warmup = 5 + iteration = None + + if FLAGS.benchmark and FLAGS.mode == 'performance': + iteration = FLAGS.iters + for idx, (inputs, labels) in enumerate(dataloader): + inputs = np.array(inputs) + input_tensor = tf.constant(inputs) + start = time.time() + predictions = infer(input_tensor)[output_name] + end = time.time() + latency_list.append(end - start) + predictions = predictions.numpy() + predictions, labels = postprocess((predictions, labels)) + metric.update(predictions, labels) + if iteration and idx >= iteration: + break + latency = np.array(latency_list[warmup:]).mean() / eval_dataloader.batch_size + return latency + + latency = eval_func(eval_dataloader, metric) + if FLAGS.benchmark: + logger.info("\n{} mode benchmark result:".format(FLAGS.mode)) + for i, res in enumerate(latency_list): + logger.debug("Iteration {} result {}:".format(i, res)) + if FLAGS.benchmark and FLAGS.mode == 'performance': + print("Batch size = {}".format(eval_dataloader.batch_size)) + print("Latency: {:.3f} ms".format(latency * 1000)) + print("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() + return acc def main(_): - if FLAGS.tune: - from neural_compressor.experimental import Quantization, common - quantizer = Quantization(FLAGS.config) - quantizer.model = common.Model(FLAGS.input_model) - quantizer.eval_func = evaluate - quantizer.calib_dataloader = calib_dataloader - q_model = quantizer.fit() - q_model.save(FLAGS.output_model) - - - if FLAGS.benchmark: - from neural_compressor.experimental import Benchmark, common - evaluator = Benchmark(FLAGS.config) - evaluator.model = common.Model(FLAGS.input_model) - evaluator.b_func = evaluate - evaluator.b_dataloader = eval_dataloader - evaluator(FLAGS.mode) + if FLAGS.tune: + from neural_compressor.quantization import fit + from neural_compressor.config import PostTrainingQuantConfig + from neural_compressor.utils.utility import set_random_seed + set_random_seed(9527) + config = PostTrainingQuantConfig(calibration_sampling_size=[50, 100]) + q_model = fit( + model=FLAGS.input_model, + conf=config, + calib_dataloader=calib_dataloader, + eval_dataloader=eval_dataloader, + eval_func=evaluate) + q_model.save(FLAGS.output_model) + + if FLAGS.benchmark: + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + if FLAGS.mode == 'performance': + conf = BenchmarkConfig(iteration=100, cores_per_instance=4, num_of_instance=7) + fit(FLAGS.input_model, conf, b_func=evaluate) + else: + from neural_compressor.model.model import Model + accuracy = evaluate(Model(FLAGS.input_model).model) + print('Batch size = %d' % FLAGS.batch_size) + print("Accuracy: %.5f" % accuracy) if __name__ == "__main__": tf.compat.v1.app.run() diff --git a/examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/prepare_model.py b/examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/prepare_model.py index cc68ac4e6b4..6de15d47074 100644 --- a/examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/prepare_model.py +++ b/examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/prepare_model.py @@ -1,3 +1,21 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + import argparse import tensorflow as tf def get_xception_model(saved_path): diff --git a/examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/run_benchmark.sh b/examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/run_benchmark.sh index ca49af56795..d464b019f8e 100644 --- a/examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/run_benchmark.sh +++ b/examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/run_benchmark.sh @@ -10,21 +10,26 @@ function main { # init params function init_params { + batch_size=32 + iters=100 + for var in "$@" do case $var in - --config=*) - config=$(echo $var |cut -f2 -d=) - ;; --input_model=*) input_model=$(echo $var |cut -f2 -d=) ;; --mode=*) mode=$(echo $var |cut -f2 -d=) ;; - --eval_data=*) - eval_data=$(echo $var |cut -f2 -d=) + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) ;; + --iters=*) + iters=$(echo $var |cut -f2 -d=) esac done @@ -35,10 +40,11 @@ function run_benchmark { python main.py \ --input_model ${input_model} \ - --config ${config} \ --benchmark \ --mode ${mode} \ - --eval_data ${eval_data} + --eval_data ${dataset_location} \ + --batch_size ${batch_size} \ + --iters ${iters} } main "$@" diff --git a/examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/run_tuning.sh b/examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/run_tuning.sh index 666154ca113..7e3ed727f71 100644 --- a/examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/run_tuning.sh +++ b/examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/run_tuning.sh @@ -13,20 +13,14 @@ function init_params { for var in "$@" do case $var in - --config=*) - config=$(echo $var |cut -f2 -d=) - ;; --input_model=*) input_model=$(echo $var |cut -f2 -d=) ;; --output_model=*) output_model=$(echo $var |cut -f2 -d=) ;; - --eval_data=*) - eval_data=$(echo $var |cut -f2 -d=) - ;; - --calib_data=*) - calib_data=$(echo $var |cut -f2 -d=) + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) ;; esac done @@ -38,9 +32,8 @@ function run_tuning { python main.py \ --input_model ${input_model} \ --output_model ${output_model} \ - --config ${config} \ - --eval_data ${eval_data} \ - --calib_data ${calib_data} \ + --eval_data ${dataset_location} \ + --calib_data ${dataset_location} \ --tune } diff --git a/examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/xception.yaml b/examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/xception.yaml deleted file mode 100644 index 49eb1519cf8..00000000000 --- a/examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/xception.yaml +++ /dev/null @@ -1,49 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -version: 1.0 - -model: # mandatory. used to specify model specific information. - name: xception - framework: tensorflow # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: cpu # optional. default value is cpu, other value is gpu. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - op_wise: { - 'v0/cg/conv0/conv2d/Conv2D': { - 'activation': {'dtype': ['fp32']}, - } - } - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - performance: # optional. used to benchmark performance of passing model. - iteration: 100 - configs: - cores_per_instance: 4 - num_of_instance: 7 - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/xception_itex.yaml b/examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/xception_itex.yaml deleted file mode 100644 index e0521d17216..00000000000 --- a/examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/xception_itex.yaml +++ /dev/null @@ -1,49 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -version: 1.0 - -model: # mandatory. used to specify model specific information. - name: xception - framework: tensorflow_itex # mandatory. supported values are tensorflow, tensorflow_itex, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: gpu # optional. set cpu if installed intel-extension-for-tensorflow[cpu], set gpu if installed intel-extension-for-tensorflow[gpu]. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - op_wise: { - 'v0/cg/conv0/conv2d/Conv2D': { - 'activation': {'dtype': ['fp32']}, - } - } - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - performance: # optional. used to benchmark performance of passing model. - iteration: 100 - configs: - cores_per_instance: 4 - num_of_instance: 7 - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/resnet_v2.py b/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/resnet_v2.py index 2824a22f95c..c4e45ce5017 100644 --- a/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/resnet_v2.py +++ b/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/resnet_v2.py @@ -8,7 +8,6 @@ from tensorflow.keras.models import Model from tensorflow.keras.datasets import cifar10 import numpy as np -import yaml def lr_schedule(epoch): @@ -178,29 +177,6 @@ def resnet_v2(input_shape, depth, num_classes=10): return model -def build_fake_yaml(): - fake_yaml = ''' - model: - name: fake_yaml - framework: tensorflow - - device: cpu - quantization: - approach: quant_aware_training - evaluation: - accuracy: - metric: - topk: 1 - tuning: - exit_policy: - performance_only: True - ''' - y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: - yaml.dump(y, f) - f.close() - - # Training parameters batch_size = 32 # orig paper trained all networks with batch_size=128 epochs = 2 @@ -282,7 +258,7 @@ def train(): model.save("baseline_model") -def q_func(model): +def q_func(compression_manager, model): # Load the CIFAR10 data. (x_train, y_train), (x_test, y_test) = cifar10.load_data() @@ -332,8 +308,8 @@ def q_func(model): x_test, y_test, verbose=0) print('Quant test accuracy:', q_aware_model_accuracy) - q_aware_model.save("trained_qat_model") - return 'trained_qat_model' + + return q_aware_model class Dataset(object): def __init__(self, batch_size=500): @@ -365,11 +341,13 @@ def __getitem__(self, idx): return self.test_images[idx], self.test_labels[idx] if __name__ == '__main__': - build_fake_yaml() train() - from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - quantizer.eval_dataloader = common.DataLoader(Dataset()) - quantizer.model = './baseline_model' - quantizer.q_func = q_func - quantizer.fit() + + from neural_compressor.training import prepare_compression + from neural_compressor.config import QuantizationAwareTrainingConfig + conf = QuantizationAwareTrainingConfig(backend="tensorflow") + compression_manager = prepare_compression('./baseline_model', conf) + compression_manager.callbacks.on_train_begin() + q_aware_model = q_func(compression_manager, compression_manager.model) + q_aware_model.save("trained_qat_model") + compression_manager.callbacks.on_train_end()