diff --git a/neural_compressor/experimental/benchmark.py b/neural_compressor/experimental/benchmark.py
index ad9efb470aa..4ee2f8e6fd6 100644
--- a/neural_compressor/experimental/benchmark.py
+++ b/neural_compressor/experimental/benchmark.py
@@ -179,12 +179,6 @@ def __call__(self, mode='performance'):
         """
         cfg = self.conf.usr_cfg
         assert cfg.evaluation is not None, 'benchmark evaluation filed should not be None...'
-        if self._b_func is None:
-            assert cfg.evaluation is not None, \
-                'You must pass b_func or benchmark evaluation filed should be set in config yaml file...'
-        # use first eval config in yaml if mode from __call__not same with yaml config
-        if not mode in cfg.evaluation:
-            mode = list(cfg.evaluation.keys())[0]
         assert sys.platform in ['linux', 'win32'], 'only support platform windows and linux...'
         set_all_env_var(deep_get(cfg, 'evaluation.{}.configs'.format(mode)))
         # disable multi-instance for accuracy mode
@@ -344,7 +338,6 @@ def run_instance(self, mode):
             b_dataloader_cfg = deep_get(cfg, 'evaluation.{}.dataloader'.format(mode))
             self._b_dataloader = create_dataloader(self.framework, b_dataloader_cfg)
 
-        is_measure = True
         if self._b_func is None:
             self._b_func = create_eval_func(self.framework, \
                                     self._b_dataloader, \
@@ -354,14 +347,13 @@ def run_instance(self, mode):
                                     iteration=iteration)
         else:
             self._custom_b_func = True
-            is_measure = False
 
         objectives = [i.lower() for i in cfg.tuning.multi_objectives.objective] if \
             deep_get(cfg, 'tuning.multi_objectives') else [cfg.tuning.objective]
         assert len(objectives) == 1, 'benchmark supports one objective at a time'
         self.objectives = MultiObjective(objectives,
                               cfg.tuning.accuracy_criterion,
-                              is_measure=is_measure)
+                              is_measure=True)
 
         if self._custom_b_func:
             val = self.objectives.evaluate(self._b_func, self._model.model)
@@ -370,7 +362,8 @@ def run_instance(self, mode):
         # measurer contain info not only performance(eg, memory, model_size)
         # also measurer have result list among steps
         acc, _ = val
-        warmup = 0 if deep_get(cfg, 'evaluation.{}.warmup'.format(mode)) is None \
+        batch_size = self._b_dataloader.batch_size
+        warmup =  0 if deep_get(cfg, 'evaluation.{}.warmup'.format(mode)) is None \
             else deep_get(cfg, 'evaluation.{}.warmup'.format(mode))
 
         if len(self.objectives.objectives[0].result_list()) < warmup:
@@ -380,20 +373,19 @@ def run_instance(self, mode):
                 warmup = 0
 
         result_list = self.objectives.objectives[0].result_list()[warmup:]
+        latency = np.array(result_list).mean() / batch_size
+        self._results[mode] = acc, batch_size, result_list
 
         logger.info("\n{} mode benchmark result:".format(mode))
         for i, res in enumerate(result_list):
             logger.debug("Iteration {} result {}:".format(i, res))
         if mode == 'accuracy':
-            self._results[mode] = acc, result_list
+            logger.info("Batch size = {}".format(batch_size))
             if isinstance(acc, list):
                 logger.info("Accuracy is" + "".join([" {:.4f}".format(i) for i in acc]))
             else:
                 logger.info("Accuracy is {:.4f}".format(acc))
         elif mode == 'performance':
-            batch_size = self._b_dataloader.batch_size
-            latency = np.array(result_list).mean() / batch_size
-            self._results[mode] = acc, batch_size, result_list
             logger.info("Batch size = {}".format(batch_size))
             logger.info("Latency: {:.3f} ms".format(latency * 1000))
             logger.info("Throughput: {:.3f} images/sec".format(1. / latency))
@@ -475,10 +467,9 @@ def model(self, user_model):
                        auto inferenced, but sometimes auto inferenced
                        inputs/outputs will not meet your requests, so it is better to
                        set them manually in config yaml file.
-                       Another corner case is slim model of tensorflow,
-                       be careful of the name of model configured in yaml file,
-                       make sure the name is in supported slim model list.
-
+                       Another corner case is the slim model of tensorflow,
+                       be careful of the name of the model configured in the yaml file,
+                       make sure the name is in the supported slim model list.
         """
         if not isinstance(user_model, BaseModel):
             logger.warning("Force convert framework model to neural_compressor model.")
@@ -525,7 +516,7 @@ def metric(self, user_metric):
         if deep_get(self.conf.usr_cfg, "evaluation.accuracy.metric"):
             logger.warning("Override the value of `metric` field defined in yaml file" \
                            " as user defines the value of `metric` attribute by code.")
-
+ 
         if isinstance(user_metric, NCMetric):
             metric_cfg = {user_metric.name : {**user_metric.kwargs}}
             deep_set(self.conf.usr_cfg, "evaluation.accuracy.metric", metric_cfg)
@@ -570,4 +561,4 @@ def postprocess(self, user_postprocess):
 
     def __repr__(self):
         """Get the object representation in string format."""
-        return 'Benchmark'
\ No newline at end of file
+        return 'Benchmark'
diff --git a/test/benchmark/test_benchmark.py b/test/benchmark/test_benchmark.py
index f32e65525fa..37aef1ca500 100644
--- a/test/benchmark/test_benchmark.py
+++ b/test/benchmark/test_benchmark.py
@@ -4,11 +4,13 @@
 import os
 import yaml
 import numpy as np
-import tensorflow as tf
 import tempfile
 import re
+import platform
 from neural_compressor.adaptor.tf_utils.util import write_graph
 
+import tensorflow as tf
+
 def build_fake_yaml():
     fake_yaml = '''
         model:
@@ -43,12 +45,14 @@ def build_benchmark():
 arg_parser = ArgumentParser(description='Parse args')
 arg_parser.add_argument('--input_model', dest='input_model', default='input_model', help='input odel')
 args = arg_parser.parse_args()
-import neural_compressor
 from neural_compressor.data import DATASETS
-from neural_compressor.experimental import common
 dataset = DATASETS('tensorflow')['dummy']((100, 32, 32, 1), label=True)
-b_dataloader = common.DataLoader(dataset, batch_size=10)
-neural_compressor.benchmark(args.input_model, 'fake_yaml.yaml', b_dataloader=b_dataloader)
+from neural_compressor.experimental import Benchmark, common
+from neural_compressor.conf.config import BenchmarkConf
+benchmarker = Benchmark('fake_yaml.yaml')
+benchmarker.b_dataloader = common.DataLoader(dataset, batch_size=10)
+benchmarker.model = args.input_model
+benchmarker.fit()
     '''
 
     seq1 = '''
@@ -56,14 +60,15 @@ def build_benchmark():
 arg_parser = ArgumentParser(description='Parse args')
 arg_parser.add_argument('--input_model', dest='input_model', default='input_model', help='input odel')
 args = arg_parser.parse_args()
-import neural_compressor
 from neural_compressor.data import DATASETS
 dataset = DATASETS('tensorflow')['dummy']((100, 32, 32, 1), label=True)
-from neural_compressor.experimental import common
+from neural_compressor.experimental import Benchmark, common
 from neural_compressor.conf.config import BenchmarkConf
 conf = BenchmarkConf('fake_yaml.yaml')
-b_dataloader = common.DataLoader(dataset, batch_size=10)
-neural_compressor.benchmark(args.input_model, conf, b_dataloader=b_dataloader)
+benchmarker = Benchmark(conf)
+benchmarker.b_dataloader = common.DataLoader(dataset, batch_size=10)
+benchmarker.model = args.input_model
+benchmarker.fit()
     '''
 
     # test normal case
@@ -88,13 +93,15 @@ def build_benchmark2():
         "arg_parser = ArgumentParser(description='Parse args')\n",
         "arg_parser.add_argument('--input_model', dest='input_model', default='input_model', help='input model')\n",
         "args = arg_parser.parse_args()\n",
-        "import neural_compressor\n"
+
         "from neural_compressor.data import DATASETS\n",
         "dataset = DATASETS('tensorflow')['dummy']((5, 32, 32, 1), label=True)\n",
 
-        "from neural_compressor.experimental import common\n",
-        "b_dataloader = common.DataLoader(dataset)\n",
-        "neural_compressor.benchmark(args.input_model, b_dataloader=b_dataloader)\n"
+        "from neural_compressor.experimental import Benchmark, common\n",
+        "benchmarker = Benchmark()\n",
+        "benchmarker.model = args.input_model\n",
+        "benchmarker.b_dataloader = common.DataLoader(dataset)\n",
+        "benchmarker.fit()\n"
     ]
 
     seq1 = '''
@@ -102,11 +109,13 @@ def build_benchmark2():
 arg_parser = ArgumentParser(description='Parse args')
 arg_parser.add_argument('--input_model', dest='input_model', default='input_model', help='input odel')
 args = arg_parser.parse_args()
-import neural_compressor
+
 from neural_compressor import conf
-from neural_compressor.experimental import common
+from neural_compressor.experimental import Benchmark, common
 conf.evaluation.performance.dataloader.dataset = {'dummy': {'shape': [100,32,32,1], 'label':True}}
-neural_compressor.benchmark(args.input_model, conf)
+benchmarker = Benchmark(conf)
+benchmarker.model = args.input_model
+benchmarker.fit()
     '''
 
     seq2 = '''
@@ -188,6 +197,7 @@ def setUpClass(self):
         build_benchmark()
         build_benchmark2()
         self.cpu_counts = psutil.cpu_count(logical=False)
+        self.platform = platform.system().lower()
 
     @classmethod
     def tearDownClass(self):
@@ -195,11 +205,11 @@ def tearDownClass(self):
             os.remove('fake_yaml.yaml')
         if os.path.exists('fake.py'):
             os.remove('fake.py')
-        if os.path.exists('fake.py'):
+        if os.path.exists('fake2.py'):
             os.remove('fake2.py')
-        if os.path.exists('fake.py'):
+        if os.path.exists('fake3.py'):
             os.remove('fake3.py')
-        if os.path.exists('fake.py'):
+        if os.path.exists('fake4.py'):
             os.remove('fake4.py')
         if os.path.exists('fake_data_5.py'):
             os.remove('fake_data_5.py')
@@ -248,8 +258,8 @@ def test_benchmark_without_yaml(self):
         os.system("python fake2.py --input_model={} 2>&1 | tee benchmark.log".format(self.graph_path))
         with open('benchmark.log', "r") as f:
             for line in f:
-                accuracy = re.search(r"Accuracy is\s+(\d+(\.\d+)?)", line)
-            self.assertIsNotNone(accuracy)
+                throughput = re.search(r"Throughput sum: (\d+(\.\d+)?)", line)
+            self.assertIsNotNone(throughput)
         os.system("rm *.log")
 
     def test_benchmark_with_conf(self):
@@ -259,7 +269,7 @@ def test_benchmark_with_conf(self):
                 throughput = re.search(r"Throughput:\s+(\d+(\.\d+)?) images/sec", line)
             self.assertIsNotNone(throughput)
         os.system("rm *.log")
- 
+
     def test_benchmark_with_custom_metric(self):
         os.system("python fake4.py --input_model={} 2>&1 | tee benchmark.log".format(self.graph_path))
         with open('benchmark.log', "r") as f:
@@ -267,6 +277,6 @@ def test_benchmark_with_custom_metric(self):
                 accuracy = re.search(r"Accuracy is\s+(\d+(\.\d+)?)", line)
             self.assertIsNotNone(accuracy)
         os.system("rm *.log")
- 
+
 if __name__ == "__main__":
-    unittest.main()
\ No newline at end of file
+    unittest.main()