diff --git a/neural_compressor/experimental/benchmark.py b/neural_compressor/experimental/benchmark.py index ad9efb470aa..4ee2f8e6fd6 100644 --- a/neural_compressor/experimental/benchmark.py +++ b/neural_compressor/experimental/benchmark.py @@ -179,12 +179,6 @@ def __call__(self, mode='performance'): """ cfg = self.conf.usr_cfg assert cfg.evaluation is not None, 'benchmark evaluation filed should not be None...' - if self._b_func is None: - assert cfg.evaluation is not None, \ - 'You must pass b_func or benchmark evaluation filed should be set in config yaml file...' - # use first eval config in yaml if mode from __call__not same with yaml config - if not mode in cfg.evaluation: - mode = list(cfg.evaluation.keys())[0] assert sys.platform in ['linux', 'win32'], 'only support platform windows and linux...' set_all_env_var(deep_get(cfg, 'evaluation.{}.configs'.format(mode))) # disable multi-instance for accuracy mode @@ -344,7 +338,6 @@ def run_instance(self, mode): b_dataloader_cfg = deep_get(cfg, 'evaluation.{}.dataloader'.format(mode)) self._b_dataloader = create_dataloader(self.framework, b_dataloader_cfg) - is_measure = True if self._b_func is None: self._b_func = create_eval_func(self.framework, \ self._b_dataloader, \ @@ -354,14 +347,13 @@ def run_instance(self, mode): iteration=iteration) else: self._custom_b_func = True - is_measure = False objectives = [i.lower() for i in cfg.tuning.multi_objectives.objective] if \ deep_get(cfg, 'tuning.multi_objectives') else [cfg.tuning.objective] assert len(objectives) == 1, 'benchmark supports one objective at a time' self.objectives = MultiObjective(objectives, cfg.tuning.accuracy_criterion, - is_measure=is_measure) + is_measure=True) if self._custom_b_func: val = self.objectives.evaluate(self._b_func, self._model.model) @@ -370,7 +362,8 @@ def run_instance(self, mode): # measurer contain info not only performance(eg, memory, model_size) # also measurer have result list among steps acc, _ = val - warmup = 0 if deep_get(cfg, 'evaluation.{}.warmup'.format(mode)) is None \ + batch_size = self._b_dataloader.batch_size + warmup = 0 if deep_get(cfg, 'evaluation.{}.warmup'.format(mode)) is None \ else deep_get(cfg, 'evaluation.{}.warmup'.format(mode)) if len(self.objectives.objectives[0].result_list()) < warmup: @@ -380,20 +373,19 @@ def run_instance(self, mode): warmup = 0 result_list = self.objectives.objectives[0].result_list()[warmup:] + latency = np.array(result_list).mean() / batch_size + self._results[mode] = acc, batch_size, result_list logger.info("\n{} mode benchmark result:".format(mode)) for i, res in enumerate(result_list): logger.debug("Iteration {} result {}:".format(i, res)) if mode == 'accuracy': - self._results[mode] = acc, result_list + logger.info("Batch size = {}".format(batch_size)) if isinstance(acc, list): logger.info("Accuracy is" + "".join([" {:.4f}".format(i) for i in acc])) else: logger.info("Accuracy is {:.4f}".format(acc)) elif mode == 'performance': - batch_size = self._b_dataloader.batch_size - latency = np.array(result_list).mean() / batch_size - self._results[mode] = acc, batch_size, result_list logger.info("Batch size = {}".format(batch_size)) logger.info("Latency: {:.3f} ms".format(latency * 1000)) logger.info("Throughput: {:.3f} images/sec".format(1. / latency)) @@ -475,10 +467,9 @@ def model(self, user_model): auto inferenced, but sometimes auto inferenced inputs/outputs will not meet your requests, so it is better to set them manually in config yaml file. - Another corner case is slim model of tensorflow, - be careful of the name of model configured in yaml file, - make sure the name is in supported slim model list. - + Another corner case is the slim model of tensorflow, + be careful of the name of the model configured in the yaml file, + make sure the name is in the supported slim model list. """ if not isinstance(user_model, BaseModel): logger.warning("Force convert framework model to neural_compressor model.") @@ -525,7 +516,7 @@ def metric(self, user_metric): if deep_get(self.conf.usr_cfg, "evaluation.accuracy.metric"): logger.warning("Override the value of `metric` field defined in yaml file" \ " as user defines the value of `metric` attribute by code.") - + if isinstance(user_metric, NCMetric): metric_cfg = {user_metric.name : {**user_metric.kwargs}} deep_set(self.conf.usr_cfg, "evaluation.accuracy.metric", metric_cfg) @@ -570,4 +561,4 @@ def postprocess(self, user_postprocess): def __repr__(self): """Get the object representation in string format.""" - return 'Benchmark' \ No newline at end of file + return 'Benchmark' diff --git a/test/benchmark/test_benchmark.py b/test/benchmark/test_benchmark.py index f32e65525fa..37aef1ca500 100644 --- a/test/benchmark/test_benchmark.py +++ b/test/benchmark/test_benchmark.py @@ -4,11 +4,13 @@ import os import yaml import numpy as np -import tensorflow as tf import tempfile import re +import platform from neural_compressor.adaptor.tf_utils.util import write_graph +import tensorflow as tf + def build_fake_yaml(): fake_yaml = ''' model: @@ -43,12 +45,14 @@ def build_benchmark(): arg_parser = ArgumentParser(description='Parse args') arg_parser.add_argument('--input_model', dest='input_model', default='input_model', help='input odel') args = arg_parser.parse_args() -import neural_compressor from neural_compressor.data import DATASETS -from neural_compressor.experimental import common dataset = DATASETS('tensorflow')['dummy']((100, 32, 32, 1), label=True) -b_dataloader = common.DataLoader(dataset, batch_size=10) -neural_compressor.benchmark(args.input_model, 'fake_yaml.yaml', b_dataloader=b_dataloader) +from neural_compressor.experimental import Benchmark, common +from neural_compressor.conf.config import BenchmarkConf +benchmarker = Benchmark('fake_yaml.yaml') +benchmarker.b_dataloader = common.DataLoader(dataset, batch_size=10) +benchmarker.model = args.input_model +benchmarker.fit() ''' seq1 = ''' @@ -56,14 +60,15 @@ def build_benchmark(): arg_parser = ArgumentParser(description='Parse args') arg_parser.add_argument('--input_model', dest='input_model', default='input_model', help='input odel') args = arg_parser.parse_args() -import neural_compressor from neural_compressor.data import DATASETS dataset = DATASETS('tensorflow')['dummy']((100, 32, 32, 1), label=True) -from neural_compressor.experimental import common +from neural_compressor.experimental import Benchmark, common from neural_compressor.conf.config import BenchmarkConf conf = BenchmarkConf('fake_yaml.yaml') -b_dataloader = common.DataLoader(dataset, batch_size=10) -neural_compressor.benchmark(args.input_model, conf, b_dataloader=b_dataloader) +benchmarker = Benchmark(conf) +benchmarker.b_dataloader = common.DataLoader(dataset, batch_size=10) +benchmarker.model = args.input_model +benchmarker.fit() ''' # test normal case @@ -88,13 +93,15 @@ def build_benchmark2(): "arg_parser = ArgumentParser(description='Parse args')\n", "arg_parser.add_argument('--input_model', dest='input_model', default='input_model', help='input model')\n", "args = arg_parser.parse_args()\n", - "import neural_compressor\n" + "from neural_compressor.data import DATASETS\n", "dataset = DATASETS('tensorflow')['dummy']((5, 32, 32, 1), label=True)\n", - "from neural_compressor.experimental import common\n", - "b_dataloader = common.DataLoader(dataset)\n", - "neural_compressor.benchmark(args.input_model, b_dataloader=b_dataloader)\n" + "from neural_compressor.experimental import Benchmark, common\n", + "benchmarker = Benchmark()\n", + "benchmarker.model = args.input_model\n", + "benchmarker.b_dataloader = common.DataLoader(dataset)\n", + "benchmarker.fit()\n" ] seq1 = ''' @@ -102,11 +109,13 @@ def build_benchmark2(): arg_parser = ArgumentParser(description='Parse args') arg_parser.add_argument('--input_model', dest='input_model', default='input_model', help='input odel') args = arg_parser.parse_args() -import neural_compressor + from neural_compressor import conf -from neural_compressor.experimental import common +from neural_compressor.experimental import Benchmark, common conf.evaluation.performance.dataloader.dataset = {'dummy': {'shape': [100,32,32,1], 'label':True}} -neural_compressor.benchmark(args.input_model, conf) +benchmarker = Benchmark(conf) +benchmarker.model = args.input_model +benchmarker.fit() ''' seq2 = ''' @@ -188,6 +197,7 @@ def setUpClass(self): build_benchmark() build_benchmark2() self.cpu_counts = psutil.cpu_count(logical=False) + self.platform = platform.system().lower() @classmethod def tearDownClass(self): @@ -195,11 +205,11 @@ def tearDownClass(self): os.remove('fake_yaml.yaml') if os.path.exists('fake.py'): os.remove('fake.py') - if os.path.exists('fake.py'): + if os.path.exists('fake2.py'): os.remove('fake2.py') - if os.path.exists('fake.py'): + if os.path.exists('fake3.py'): os.remove('fake3.py') - if os.path.exists('fake.py'): + if os.path.exists('fake4.py'): os.remove('fake4.py') if os.path.exists('fake_data_5.py'): os.remove('fake_data_5.py') @@ -248,8 +258,8 @@ def test_benchmark_without_yaml(self): os.system("python fake2.py --input_model={} 2>&1 | tee benchmark.log".format(self.graph_path)) with open('benchmark.log', "r") as f: for line in f: - accuracy = re.search(r"Accuracy is\s+(\d+(\.\d+)?)", line) - self.assertIsNotNone(accuracy) + throughput = re.search(r"Throughput sum: (\d+(\.\d+)?)", line) + self.assertIsNotNone(throughput) os.system("rm *.log") def test_benchmark_with_conf(self): @@ -259,7 +269,7 @@ def test_benchmark_with_conf(self): throughput = re.search(r"Throughput:\s+(\d+(\.\d+)?) images/sec", line) self.assertIsNotNone(throughput) os.system("rm *.log") - + def test_benchmark_with_custom_metric(self): os.system("python fake4.py --input_model={} 2>&1 | tee benchmark.log".format(self.graph_path)) with open('benchmark.log', "r") as f: @@ -267,6 +277,6 @@ def test_benchmark_with_custom_metric(self): accuracy = re.search(r"Accuracy is\s+(\d+(\.\d+)?)", line) self.assertIsNotNone(accuracy) os.system("rm *.log") - + if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main()