From 99fa7a8205de7a3e0491e18c7ffcc40c8356c721 Mon Sep 17 00:00:00 2001
From: lym0302 <lym0302@foxmail.com>
Date: Wed, 9 Mar 2022 20:10:00 +0800
Subject: [PATCH 1/9] add server cls, test=doc

---
 paddlespeech/cli/cls/infer.py                 |   3 +-
 paddlespeech/server/__init__.py               |   1 +
 .../server/bin/paddlespeech_client.py         |  70 +++++-
 paddlespeech/server/conf/application.yaml     |  34 ++-
 paddlespeech/server/engine/cls/__init__.py    |  13 +
 .../engine/cls/paddleinference/__init__.py    |  13 +
 .../engine/cls/paddleinference/cls_engine.py  | 225 ++++++++++++++++++
 .../server/engine/cls/python/__init__.py      |  13 +
 .../server/engine/cls/python/cls_engine.py    | 124 ++++++++++
 paddlespeech/server/engine/engine_factory.py  |   6 +
 .../engine/tts/paddleinference/tts_engine.py  |  73 +++---
 paddlespeech/server/restful/api.py            |   5 +-
 paddlespeech/server/restful/cls_api.py        |  92 +++++++
 paddlespeech/server/restful/request.py        |  17 +-
 paddlespeech/server/restful/response.py       |  43 +++-
 paddlespeech/server/utils/paddle_predictor.py |   7 +-
 16 files changed, 687 insertions(+), 52 deletions(-)
 create mode 100644 paddlespeech/server/engine/cls/__init__.py
 create mode 100644 paddlespeech/server/engine/cls/paddleinference/__init__.py
 create mode 100644 paddlespeech/server/engine/cls/paddleinference/cls_engine.py
 create mode 100644 paddlespeech/server/engine/cls/python/__init__.py
 create mode 100644 paddlespeech/server/engine/cls/python/cls_engine.py
 create mode 100644 paddlespeech/server/restful/cls_api.py

diff --git a/paddlespeech/cli/cls/infer.py b/paddlespeech/cli/cls/infer.py
index ab5eee6e288..f56d8a579c5 100644
--- a/paddlespeech/cli/cls/infer.py
+++ b/paddlespeech/cli/cls/infer.py
@@ -193,7 +193,8 @@ def preprocess(self, audio_file: Union[str, os.PathLike]):
             sr=feat_conf['sample_rate'],
             mono=True,
             dtype='float32')
-        logger.info("Preprocessing audio_file:" + audio_file)
+        if isinstance(audio_file, (str, os.PathLike)):
+            logger.info("Preprocessing audio_file:" + audio_file)
 
         # Feature extraction
         feature_extractor = LogMelSpectrogram(
diff --git a/paddlespeech/server/__init__.py b/paddlespeech/server/__init__.py
index 384061ddae2..97722c0a0cb 100644
--- a/paddlespeech/server/__init__.py
+++ b/paddlespeech/server/__init__.py
@@ -18,6 +18,7 @@
 from .base_commands import ServerBaseCommand
 from .base_commands import ServerHelpCommand
 from .bin.paddlespeech_client import ASRClientExecutor
+from .bin.paddlespeech_client import CLSClientExecutor
 from .bin.paddlespeech_client import TTSClientExecutor
 from .bin.paddlespeech_server import ServerExecutor
 
diff --git a/paddlespeech/server/bin/paddlespeech_client.py b/paddlespeech/server/bin/paddlespeech_client.py
index ee6ab7ad764..939779b7465 100644
--- a/paddlespeech/server/bin/paddlespeech_client.py
+++ b/paddlespeech/server/bin/paddlespeech_client.py
@@ -31,7 +31,7 @@
 from paddlespeech.server.utils.audio_process import wav2pcm
 from paddlespeech.server.utils.util import wav2base64
 
-__all__ = ['TTSClientExecutor', 'ASRClientExecutor']
+__all__ = ['TTSClientExecutor', 'ASRClientExecutor', 'CLSClientExecutor']
 
 
 @cli_client_register(
@@ -243,3 +243,71 @@ def __call__(self,
             print("time cost %f s." % (time_end - time_start))
         except BaseException:
             print("Failed to speech recognition.")
+
+
+@cli_client_register(
+    name='paddlespeech_client.cls', description='visit cls service')
+class CLSClientExecutor(BaseExecutor):
+    def __init__(self):
+        super(CLSClientExecutor, self).__init__()
+        self.parser = argparse.ArgumentParser(
+            prog='paddlespeech_client.cls', add_help=True)
+        self.parser.add_argument(
+            '--server_ip', type=str, default='127.0.0.1', help='server ip')
+        self.parser.add_argument(
+            '--port', type=int, default=8090, help='server port')
+        self.parser.add_argument(
+            '--input',
+            type=str,
+            default=None,
+            help='Audio file to classify.',
+            required=True)
+        self.parser.add_argument(
+            '--topk',
+            type=int,
+            default=1,
+            help='Return topk scores of classification result.')
+
+    def execute(self, argv: List[str]) -> bool:
+        args = self.parser.parse_args(argv)
+        url = 'http://' + args.server_ip + ":" + str(
+            args.port) + '/paddlespeech/cls'
+        audio = wav2base64(args.input)
+        data = {
+            "audio": audio,
+            "topk": args.topk,
+        }
+        time_start = time.time()
+        try:
+            r = requests.post(url=url, data=json.dumps(data))
+            # ending Timestamp
+            time_end = time.time()
+            logger.info(r.json())
+            logger.info("Response time %f s." % (time_end - time_start))
+            return True
+        except BaseException:
+            logger.error("Failed to speech classification.")
+            return False
+
+    @stats_wrapper
+    def __call__(self,
+                 input: str,
+                 server_ip: str="127.0.0.1",
+                 port: int=8090,
+                 topk: int=1):
+        """
+        Python API to call an executor.
+        """
+
+        url = 'http://' + server_ip + ":" + str(port) + '/paddlespeech/cls'
+        audio = wav2base64(input)
+        data = {"audio": audio, "topk": topk}
+        time_start = time.time()
+        try:
+            r = requests.post(url=url, data=json.dumps(data))
+            # ending Timestamp
+            time_end = time.time()
+            print(r.json())
+            print("Response time %f s." % (time_end - time_start))
+        except BaseException:
+            print("Failed to speech classification.")
diff --git a/paddlespeech/server/conf/application.yaml b/paddlespeech/server/conf/application.yaml
index 6048450b7ba..577315f4c39 100644
--- a/paddlespeech/server/conf/application.yaml
+++ b/paddlespeech/server/conf/application.yaml
@@ -9,12 +9,16 @@ port: 8090
 # The task format in the engin_list is: <speech task>_<engine type>
 # task choices = ['asr_python', 'asr_inference', 'tts_python', 'tts_inference']
 
-engine_list: ['asr_python', 'tts_python']
+#engine_list: ['asr_python', 'tts_python', 'cls_python']
+engine_list: ['cls_inference']
+#engine_list: ['asr_python', 'cls_python']
 
 
 #################################################################################
 #                                ENGINE CONFIG                                  #
 #################################################################################
+
+################################### ASR #########################################
 ################### speech task: asr; engine_type: python #######################
 asr_python:
     model: 'conformer_wenetspeech'
@@ -46,6 +50,7 @@ asr_inference:
         summary: True  # False -> do not show predictor config
 
 
+################################### TTS #########################################
 ################### speech task: tts; engine_type: python #######################
 tts_python: 
     # am (acoustic model) choices=['speedyspeech_csmsc', 'fastspeech2_csmsc', 
@@ -105,3 +110,30 @@ tts_inference:
     # others
     lang: 'zh'
 
+
+################################### CLS #########################################
+################### speech task: cls; engine_type: python #######################
+cls_python:
+    # model choices=['panns_cnn14', 'panns_cnn10', 'panns_cnn6']
+    model: 'panns_cnn14'
+    cfg_path: # [optional] Config of cls task.
+    ckpt_path: # [optional] Checkpoint file of model.
+    label_file: # [optional] Label file of cls task.
+    device:  # set 'gpu:id' or 'cpu'
+
+
+################### speech task: cls; engine_type: inference #######################
+cls_inference:
+    # model_type choices=['panns_cnn14', 'panns_cnn10', 'panns_cnn6']
+    model_type: 'panns_cnn14' 
+    cfg_path: 
+    model_path:  # the pdmodel file of am static model [optional]
+    params_path:  # the pdiparams file of am static model [optional]
+    label_file:  # [optional] Label file of cls task.
+
+    predictor_conf:
+        device:  # set 'gpu:id' or 'cpu'
+        switch_ir_optim: True
+        glog_info: False  # True -> print glog
+        summary: True  # False -> do not show predictor config
+
diff --git a/paddlespeech/server/engine/cls/__init__.py b/paddlespeech/server/engine/cls/__init__.py
new file mode 100644
index 00000000000..97043fd7ba6
--- /dev/null
+++ b/paddlespeech/server/engine/cls/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/paddlespeech/server/engine/cls/paddleinference/__init__.py b/paddlespeech/server/engine/cls/paddleinference/__init__.py
new file mode 100644
index 00000000000..97043fd7ba6
--- /dev/null
+++ b/paddlespeech/server/engine/cls/paddleinference/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/paddlespeech/server/engine/cls/paddleinference/cls_engine.py b/paddlespeech/server/engine/cls/paddleinference/cls_engine.py
new file mode 100644
index 00000000000..9ccc1f78461
--- /dev/null
+++ b/paddlespeech/server/engine/cls/paddleinference/cls_engine.py
@@ -0,0 +1,225 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import io
+import os
+import time
+from typing import Optional
+
+import numpy as np
+import paddle
+import yaml
+
+from paddlespeech.cli.cls.infer import CLSExecutor
+from paddlespeech.cli.log import logger
+from paddlespeech.cli.utils import download_and_decompress
+from paddlespeech.cli.utils import MODEL_HOME
+from paddlespeech.server.engine.base_engine import BaseEngine
+from paddlespeech.server.utils.paddle_predictor import init_predictor
+from paddlespeech.server.utils.paddle_predictor import run_model
+
+__all__ = ['CLSEngine']
+
+pretrained_models = {
+    "panns_cnn6-32k": {
+        'url':
+        'https://paddlespeech.bj.bcebos.com/cls/inference_model/panns_cnn6_static.tar.gz',
+        'md5':
+        'da087c31046d23281d8ec5188c1967da',
+        'cfg_path':
+        'panns.yaml',
+        'model_path':
+        'inference.pdmodel',
+        'params_path':
+        'inference.pdiparams',
+        'label_file':
+        'audioset_labels.txt',
+    },
+    "panns_cnn10-32k": {
+        'url':
+        'https://paddlespeech.bj.bcebos.com/cls/inference_model/panns_cnn10_static.tar.gz',
+        'md5':
+        '5460cc6eafbfaf0f261cc75b90284ae1',
+        'cfg_path':
+        'panns.yaml',
+        'model_path':
+        'inference.pdmodel',
+        'params_path':
+        'inference.pdiparams',
+        'label_file':
+        'audioset_labels.txt',
+    },
+    "panns_cnn14-32k": {
+        'url':
+        'https://paddlespeech.bj.bcebos.com/cls/inference_model/panns_cnn14_static.tar.gz',
+        'md5':
+        'ccc80b194821274da79466862b2ab00f',
+        'cfg_path':
+        'panns.yaml',
+        'model_path':
+        'inference.pdmodel',
+        'params_path':
+        'inference.pdiparams',
+        'label_file':
+        'audioset_labels.txt',
+    },
+}
+
+
+class CLSServerExecutor(CLSExecutor):
+    def __init__(self):
+        super().__init__()
+        pass
+
+    def _get_pretrained_path(self, tag: str) -> os.PathLike:
+        """
+            Download and returns pretrained resources path of current task.
+        """
+        support_models = list(pretrained_models.keys())
+        assert tag in pretrained_models, 'The model "{}" you want to use has not been supported, please choose other models.\nThe support models includes:\n\t\t{}\n'.format(
+            tag, '\n\t\t'.join(support_models))
+
+        res_path = os.path.join(MODEL_HOME, tag)
+        decompressed_path = download_and_decompress(pretrained_models[tag],
+                                                    res_path)
+        print("aaaaaaaaaaaaa: ", decompressed_path)
+        decompressed_path = os.path.abspath(decompressed_path)
+        logger.info(
+            'Use pretrained model stored in: {}'.format(decompressed_path))
+
+        return decompressed_path
+
+    def _init_from_path(
+            self,
+            model_type: str='panns_cnn14',
+            cfg_path: Optional[os.PathLike]=None,
+            model_path: Optional[os.PathLike]=None,
+            params_path: Optional[os.PathLike]=None,
+            label_file: Optional[os.PathLike]=None,
+            predictor_conf: dict=None, ):
+        """
+        Init model and other resources from a specific path.
+        """
+
+        if cfg_path is None or model_path is None or params_path is None or label_file is None:
+            tag = model_type + '-' + '32k'
+            self.res_path = self._get_pretrained_path(tag)
+            self.cfg_path = os.path.join(self.res_path,
+                                         pretrained_models[tag]['cfg_path'])
+            self.model_path = os.path.join(self.res_path,
+                                           pretrained_models[tag]['model_path'])
+            self.params_path = os.path.join(
+                self.res_path, pretrained_models[tag]['params_path'])
+            self.label_file = os.path.join(self.res_path,
+                                           pretrained_models[tag]['label_file'])
+        else:
+            self.cfg_path = os.path.abspath(cfg_path)
+            self.model_path = os.path.abspath(model_path)
+            self.params_path = os.path.abspath(params_path)
+            self.label_file = os.path.abspath(label_file)
+
+        logger.info(self.cfg_path)
+        logger.info(self.model_path)
+        logger.info(self.params_path)
+        logger.info(self.label_file)
+
+        # config
+        with open(self.cfg_path, 'r') as f:
+            self._conf = yaml.safe_load(f)
+        logger.info("Read cfg file successfully.")
+
+        # labels
+        self._label_list = []
+        with open(self.label_file, 'r') as f:
+            for line in f:
+                self._label_list.append(line.strip())
+        logger.info("Read label file successfully.")
+
+        # Create predictor
+        self.predictor_conf = predictor_conf
+        self.predictor = init_predictor(
+            model_file=self.model_path,
+            params_file=self.params_path,
+            predictor_conf=self.predictor_conf)
+        logger.info("Create predictor successfully.")
+
+    @paddle.no_grad()
+    def infer(self):
+        """
+        Model inference and result stored in self.output.
+        """
+        output = run_model(self.predictor, [self._inputs['feats'].numpy()])
+        self._outputs['logits'] = output[0]
+
+
+class CLSEngine(BaseEngine):
+    """CLS server engine
+
+    Args:
+        metaclass: Defaults to Singleton.
+    """
+
+    def __init__(self):
+        super(CLSEngine, self).__init__()
+
+    def init(self, config: dict) -> bool:
+        """init engine resource
+
+        Args:
+            config_file (str): config file
+
+        Returns:
+            bool: init failed or success
+        """
+        self.executor = CLSServerExecutor()
+        self.config = config
+        self.executor._init_from_path(
+            self.config.model_type, self.config.cfg_path,
+            self.config.model_path, self.config.params_path,
+            self.config.label_file, self.config.predictor_conf)
+
+        logger.info("Initialize CLS server engine successfully.")
+        return True
+
+    def run(self, audio_data):
+        """engine run 
+
+        Args:
+            audio_data (bytes): base64.b64decode
+        """
+
+        self.executor.preprocess(io.BytesIO(audio_data))
+        st = time.time()
+        self.executor.infer()
+        infer_time = time.time() - st
+
+        logger.info("inference time: {}".format(infer_time))
+        logger.info("cls engine type: inference")
+
+    def postprocess(self, topk: int):
+        """postprocess
+        """
+        assert topk <= len(self.executor._label_list
+                           ), 'Value of topk is larger than number of labels.'
+
+        result = np.squeeze(self.executor._outputs['logits'], axis=0)
+        topk_idx = (-result).argsort()[:topk]
+        topk_results = []
+        for idx in topk_idx:
+            res = {}
+            label, score = self.executor._label_list[idx], result[idx]
+            res['class_name'] = label
+            res['prob'] = score
+            topk_results.append(res)
+
+        return topk_results
diff --git a/paddlespeech/server/engine/cls/python/__init__.py b/paddlespeech/server/engine/cls/python/__init__.py
new file mode 100644
index 00000000000..97043fd7ba6
--- /dev/null
+++ b/paddlespeech/server/engine/cls/python/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/paddlespeech/server/engine/cls/python/cls_engine.py b/paddlespeech/server/engine/cls/python/cls_engine.py
new file mode 100644
index 00000000000..1a975b0a05b
--- /dev/null
+++ b/paddlespeech/server/engine/cls/python/cls_engine.py
@@ -0,0 +1,124 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import io
+import time
+from typing import List
+
+import paddle
+
+from paddlespeech.cli.cls.infer import CLSExecutor
+from paddlespeech.cli.log import logger
+from paddlespeech.server.engine.base_engine import BaseEngine
+
+__all__ = ['CLSEngine']
+
+
+class CLSServerExecutor(CLSExecutor):
+    def __init__(self):
+        super().__init__()
+        pass
+
+    def get_topk_results(self, topk: int) -> List:
+        assert topk <= len(
+            self._label_list), 'Value of topk is larger than number of labels.'
+
+        result = self._outputs['logits'].squeeze(0).numpy()
+        topk_idx = (-result).argsort()[:topk]
+        res = {}
+        topk_results = []
+        for idx in topk_idx:
+            label, score = self._label_list[idx], result[idx]
+            res['class'] = label
+            res['prob'] = score
+            topk_results.append(res)
+        return topk_results
+
+
+class CLSEngine(BaseEngine):
+    """CLS server engine
+
+    Args:
+        metaclass: Defaults to Singleton.
+    """
+
+    def __init__(self):
+        super(CLSEngine, self).__init__()
+
+    def init(self, config: dict) -> bool:
+        """init engine resource
+
+        Args:
+            config_file (str): config file
+
+        Returns:
+            bool: init failed or success
+        """
+        self.input = None
+        self.output = None
+        self.executor = CLSServerExecutor()
+        self.config = config
+        try:
+            if self.config.device:
+                self.device = self.config.device
+            else:
+                self.device = paddle.get_device()
+            paddle.set_device(self.device)
+        except BaseException:
+            logger.error(
+                "Set device failed, please check if device is already used and the parameter 'device' in the yaml file"
+            )
+
+        try:
+            self.executor._init_from_path(
+                self.config.model, self.config.cfg_path, self.config.ckpt_path,
+                self.config.label_file)
+        except BaseException:
+            logger.error("Initialize CLS server engine Failed.")
+            return False
+
+        logger.info("Initialize CLS server engine successfully on device: %s." %
+                    (self.device))
+        return True
+
+    def run(self, audio_data):
+        """engine run 
+
+        Args:
+            audio_data (bytes): base64.b64decode
+        """
+        self.executor.preprocess(io.BytesIO(audio_data))
+        st = time.time()
+        self.executor.infer()
+        infer_time = time.time() - st
+
+        logger.info("inference time: {}".format(infer_time))
+        logger.info("cls engine type: python")
+
+    def postprocess(self, topk: int):
+        """postprocess
+        """
+        assert topk <= len(self.executor._label_list
+                           ), 'Value of topk is larger than number of labels.'
+
+        result = self.executor._outputs['logits'].squeeze(0).numpy()
+        topk_idx = (-result).argsort()[:topk]
+        topk_results = []
+        for idx in topk_idx:
+            res = {}
+            label, score = self.executor._label_list[idx], result[idx]
+            res['class_name'] = label
+            res['prob'] = score
+            topk_results.append(res)
+
+        return topk_results
diff --git a/paddlespeech/server/engine/engine_factory.py b/paddlespeech/server/engine/engine_factory.py
index 546541edfcf..c39c44cae5f 100644
--- a/paddlespeech/server/engine/engine_factory.py
+++ b/paddlespeech/server/engine/engine_factory.py
@@ -31,5 +31,11 @@ def get_engine(engine_name: Text, engine_type: Text):
         elif engine_name == 'tts' and engine_type == 'python':
             from paddlespeech.server.engine.tts.python.tts_engine import TTSEngine
             return TTSEngine()
+        elif engine_name == 'cls' and engine_type == 'inference':
+            from paddlespeech.server.engine.cls.paddleinference.cls_engine import CLSEngine
+            return CLSEngine()
+        elif engine_name == 'cls' and engine_type == 'python':
+            from paddlespeech.server.engine.cls.python.cls_engine import CLSEngine
+            return CLSEngine()
         else:
             return None
diff --git a/paddlespeech/server/engine/tts/paddleinference/tts_engine.py b/paddlespeech/server/engine/tts/paddleinference/tts_engine.py
index 1bbbe0ea3e1..6d23fb5be3f 100644
--- a/paddlespeech/server/engine/tts/paddleinference/tts_engine.py
+++ b/paddlespeech/server/engine/tts/paddleinference/tts_engine.py
@@ -250,27 +250,21 @@ def _init_from_path(
             self.frontend = English(phone_vocab_path=self.phones_dict)
         logger.info("frontend done!")
 
-        try:
-            # am predictor
-            self.am_predictor_conf = am_predictor_conf
-            self.am_predictor = init_predictor(
-                model_file=self.am_model,
-                params_file=self.am_params,
-                predictor_conf=self.am_predictor_conf)
-            logger.info("Create AM predictor successfully.")
-        except BaseException:
-            logger.error("Failed to create AM predictor.")
-
-        try:
-            # voc predictor
-            self.voc_predictor_conf = voc_predictor_conf
-            self.voc_predictor = init_predictor(
-                model_file=self.voc_model,
-                params_file=self.voc_params,
-                predictor_conf=self.voc_predictor_conf)
-            logger.info("Create Vocoder predictor successfully.")
-        except BaseException:
-            logger.error("Failed to create Vocoder predictor.")
+        # Create am predictor
+        self.am_predictor_conf = am_predictor_conf
+        self.am_predictor = init_predictor(
+            model_file=self.am_model,
+            params_file=self.am_params,
+            predictor_conf=self.am_predictor_conf)
+        logger.info("Create AM predictor successfully.")
+
+        # Create voc predictor
+        self.voc_predictor_conf = voc_predictor_conf
+        self.voc_predictor = init_predictor(
+            model_file=self.voc_model,
+            params_file=self.voc_params,
+            predictor_conf=self.voc_predictor_conf)
+        logger.info("Create Vocoder predictor successfully.")
 
     @paddle.no_grad()
     def infer(self,
@@ -359,27 +353,22 @@ def __init__(self):
     def init(self, config: dict) -> bool:
         self.executor = TTSServerExecutor()
 
-        try:
-            self.config = config
-            self.executor._init_from_path(
-                am=self.config.am,
-                am_model=self.config.am_model,
-                am_params=self.config.am_params,
-                am_sample_rate=self.config.am_sample_rate,
-                phones_dict=self.config.phones_dict,
-                tones_dict=self.config.tones_dict,
-                speaker_dict=self.config.speaker_dict,
-                voc=self.config.voc,
-                voc_model=self.config.voc_model,
-                voc_params=self.config.voc_params,
-                voc_sample_rate=self.config.voc_sample_rate,
-                lang=self.config.lang,
-                am_predictor_conf=self.config.am_predictor_conf,
-                voc_predictor_conf=self.config.voc_predictor_conf, )
-
-        except BaseException:
-            logger.error("Initialize TTS server engine Failed.")
-            return False
+        self.config = config
+        self.executor._init_from_path(
+            am=self.config.am,
+            am_model=self.config.am_model,
+            am_params=self.config.am_params,
+            am_sample_rate=self.config.am_sample_rate,
+            phones_dict=self.config.phones_dict,
+            tones_dict=self.config.tones_dict,
+            speaker_dict=self.config.speaker_dict,
+            voc=self.config.voc,
+            voc_model=self.config.voc_model,
+            voc_params=self.config.voc_params,
+            voc_sample_rate=self.config.voc_sample_rate,
+            lang=self.config.lang,
+            am_predictor_conf=self.config.am_predictor_conf,
+            voc_predictor_conf=self.config.voc_predictor_conf, )
 
         logger.info("Initialize TTS server engine successfully.")
         return True
diff --git a/paddlespeech/server/restful/api.py b/paddlespeech/server/restful/api.py
index 2d69dee8739..3f91a03b647 100644
--- a/paddlespeech/server/restful/api.py
+++ b/paddlespeech/server/restful/api.py
@@ -16,6 +16,7 @@
 from fastapi import APIRouter
 
 from paddlespeech.server.restful.asr_api import router as asr_router
+from paddlespeech.server.restful.cls_api import router as cls_router
 from paddlespeech.server.restful.tts_api import router as tts_router
 
 _router = APIRouter()
@@ -25,7 +26,7 @@ def setup_router(api_list: List):
     """setup router for fastapi
 
     Args:
-        api_list (List): [asr, tts]
+        api_list (List): [asr, tts, cls]
 
     Returns:
         APIRouter
@@ -35,6 +36,8 @@ def setup_router(api_list: List):
             _router.include_router(asr_router)
         elif api_name == 'tts':
             _router.include_router(tts_router)
+        elif api_name == 'cls':
+            _router.include_router(cls_router)
         else:
             pass
 
diff --git a/paddlespeech/server/restful/cls_api.py b/paddlespeech/server/restful/cls_api.py
new file mode 100644
index 00000000000..306d9ca9c11
--- /dev/null
+++ b/paddlespeech/server/restful/cls_api.py
@@ -0,0 +1,92 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import base64
+import traceback
+from typing import Union
+
+from fastapi import APIRouter
+
+from paddlespeech.server.engine.engine_pool import get_engine_pool
+from paddlespeech.server.restful.request import CLSRequest
+from paddlespeech.server.restful.response import CLSResponse
+from paddlespeech.server.restful.response import ErrorResponse
+from paddlespeech.server.utils.errors import ErrorCode
+from paddlespeech.server.utils.errors import failed_response
+from paddlespeech.server.utils.exception import ServerBaseException
+
+router = APIRouter()
+
+
+@router.get('/paddlespeech/cls/help')
+def help():
+    """help
+
+    Returns:
+        json: [description]
+    """
+    response = {
+        "success": "True",
+        "code": 200,
+        "message": {
+            "global": "success"
+        },
+        "result": {
+            "description": "cls server",
+            "input": "base64 string of wavfile",
+            "output": "classification result"
+        }
+    }
+    return response
+
+
+@router.post(
+    "/paddlespeech/cls", response_model=Union[CLSResponse, ErrorResponse])
+def cls(request_body: CLSRequest):
+    """cls api 
+
+    Args:
+        request_body (CLSRequest): [description]
+
+    Returns:
+        json: [description]
+    """
+    try:
+        audio_data = base64.b64decode(request_body.audio)
+
+        # get single engine from engine pool
+        engine_pool = get_engine_pool()
+        cls_engine = engine_pool['cls']
+
+        cls_engine.run(audio_data)
+        cls_results = cls_engine.postprocess(request_body.topk)
+
+        response = {
+            "success": True,
+            "code": 200,
+            "message": {
+                "description": "success"
+            },
+            "result": {
+                "topk": request_body.topk,
+                "results": cls_results
+            }
+        }
+
+    except ServerBaseException as e:
+        response = failed_response(e.error_code, e.msg)
+    except BaseException:
+        response = failed_response(ErrorCode.SERVER_UNKOWN_ERR)
+        traceback.print_exc()
+
+    return response
diff --git a/paddlespeech/server/restful/request.py b/paddlespeech/server/restful/request.py
index 28908801977..dbac9dac881 100644
--- a/paddlespeech/server/restful/request.py
+++ b/paddlespeech/server/restful/request.py
@@ -15,7 +15,7 @@
 
 from pydantic import BaseModel
 
-__all__ = ['ASRRequest', 'TTSRequest']
+__all__ = ['ASRRequest', 'TTSRequest', 'CLSRequest']
 
 
 #****************************************************************************************/
@@ -63,3 +63,18 @@ class TTSRequest(BaseModel):
     volume: float = 1.0
     sample_rate: int = 0
     save_path: str = None
+
+
+#****************************************************************************************/
+#************************************ CLS request ***************************************/
+#****************************************************************************************/
+class CLSRequest(BaseModel):
+    """
+    request body example
+    {
+        "audio": "exSI6ICJlbiIsCgkgICAgInBvc2l0aW9uIjogImZhbHNlIgoJf...",
+        "topk": 1
+    }
+    """
+    audio: str
+    topk: int = 1
diff --git a/paddlespeech/server/restful/response.py b/paddlespeech/server/restful/response.py
index 4e18ee0d790..9b647a63c49 100644
--- a/paddlespeech/server/restful/response.py
+++ b/paddlespeech/server/restful/response.py
@@ -11,9 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from typing import List
+
 from pydantic import BaseModel
 
-__all__ = ['ASRResponse', 'TTSResponse']
+__all__ = ['ASRResponse', 'TTSResponse', 'CLSResponse']
 
 
 class Message(BaseModel):
@@ -85,6 +87,45 @@ class TTSResponse(BaseModel):
     result: TTSResult
 
 
+#****************************************************************************************/
+#************************************ CLS response **************************************/
+#****************************************************************************************/
+class CLSResults(BaseModel):
+    class_name: str
+    prob: float
+
+
+class CLSResult(BaseModel):
+    topk: int
+    results: List[CLSResults]
+
+
+class CLSResponse(BaseModel):
+    """
+    response example
+    {
+        "success": true,
+        "code": 0,
+        "message": {
+            "description": "success" 
+        },
+        "result": {
+            topk: 1
+            results: [
+            {
+                "class":"Speech",
+                "prob": 0.9027184844017029
+            }
+            ]
+        }
+    }
+    """
+    success: bool
+    code: int
+    message: Message
+    result: CLSResult
+
+
 #****************************************************************************************/
 #********************************** Error response **************************************/
 #****************************************************************************************/
diff --git a/paddlespeech/server/utils/paddle_predictor.py b/paddlespeech/server/utils/paddle_predictor.py
index 4035d48d8c9..16653cf372e 100644
--- a/paddlespeech/server/utils/paddle_predictor.py
+++ b/paddlespeech/server/utils/paddle_predictor.py
@@ -35,10 +35,12 @@ def init_predictor(model_dir: Optional[os.PathLike]=None,
     Returns:
         predictor (PaddleInferPredictor): created predictor
     """
-
     if model_dir is not None:
+        assert os.path.isdir(model_dir), 'Please check model dir.'
         config = Config(args.model_dir)
     else:
+        assert os.path.isfile(model_file) and os.path.isfile(
+            params_file), 'Please check model and parameter files.'
         config = Config(model_file, params_file)
 
     # set device
@@ -66,7 +68,6 @@ def init_predictor(model_dir: Optional[os.PathLike]=None,
     config.enable_memory_optim()
 
     predictor = create_predictor(config)
-
     return predictor
 
 
@@ -84,10 +85,8 @@ def run_model(predictor, input: List) -> List:
     for i, name in enumerate(input_names):
         input_handle = predictor.get_input_handle(name)
         input_handle.copy_from_cpu(input[i])
-
     # do the inference
     predictor.run()
-
     results = []
     # get out data from output tensor
     output_names = predictor.get_output_names()

From 0a6602c7080bad613c771eac604c52cb024b9c84 Mon Sep 17 00:00:00 2001
From: lym0302 <lym0302@foxmail.com>
Date: Wed, 9 Mar 2022 20:14:27 +0800
Subject: [PATCH 2/9] modify application.yaml, test=doc

---
 demos/speech_server/conf/application.yaml | 32 ++++++++++++++++++++++-
 paddlespeech/server/conf/application.yaml |  4 +--
 tests/unit/server/conf/application.yaml   | 32 ++++++++++++++++++++++-
 3 files changed, 63 insertions(+), 5 deletions(-)

diff --git a/demos/speech_server/conf/application.yaml b/demos/speech_server/conf/application.yaml
index 6048450b7ba..2b1a0599808 100644
--- a/demos/speech_server/conf/application.yaml
+++ b/demos/speech_server/conf/application.yaml
@@ -9,12 +9,14 @@ port: 8090
 # The task format in the engin_list is: <speech task>_<engine type>
 # task choices = ['asr_python', 'asr_inference', 'tts_python', 'tts_inference']
 
-engine_list: ['asr_python', 'tts_python']
+engine_list: ['asr_python', 'tts_python', 'cls_python']
 
 
 #################################################################################
 #                                ENGINE CONFIG                                  #
 #################################################################################
+
+################################### ASR #########################################
 ################### speech task: asr; engine_type: python #######################
 asr_python:
     model: 'conformer_wenetspeech'
@@ -46,6 +48,7 @@ asr_inference:
         summary: True  # False -> do not show predictor config
 
 
+################################### TTS #########################################
 ################### speech task: tts; engine_type: python #######################
 tts_python: 
     # am (acoustic model) choices=['speedyspeech_csmsc', 'fastspeech2_csmsc', 
@@ -105,3 +108,30 @@ tts_inference:
     # others
     lang: 'zh'
 
+
+################################### CLS #########################################
+################### speech task: cls; engine_type: python #######################
+cls_python:
+    # model choices=['panns_cnn14', 'panns_cnn10', 'panns_cnn6']
+    model: 'panns_cnn14'
+    cfg_path: # [optional] Config of cls task.
+    ckpt_path: # [optional] Checkpoint file of model.
+    label_file: # [optional] Label file of cls task.
+    device:  # set 'gpu:id' or 'cpu'
+
+
+################### speech task: cls; engine_type: inference #######################
+cls_inference:
+    # model_type choices=['panns_cnn14', 'panns_cnn10', 'panns_cnn6']
+    model_type: 'panns_cnn14' 
+    cfg_path: 
+    model_path:  # the pdmodel file of am static model [optional]
+    params_path:  # the pdiparams file of am static model [optional]
+    label_file:  # [optional] Label file of cls task.
+
+    predictor_conf:
+        device:  # set 'gpu:id' or 'cpu'
+        switch_ir_optim: True
+        glog_info: False  # True -> print glog
+        summary: True  # False -> do not show predictor config
+
diff --git a/paddlespeech/server/conf/application.yaml b/paddlespeech/server/conf/application.yaml
index 577315f4c39..2b1a0599808 100644
--- a/paddlespeech/server/conf/application.yaml
+++ b/paddlespeech/server/conf/application.yaml
@@ -9,9 +9,7 @@ port: 8090
 # The task format in the engin_list is: <speech task>_<engine type>
 # task choices = ['asr_python', 'asr_inference', 'tts_python', 'tts_inference']
 
-#engine_list: ['asr_python', 'tts_python', 'cls_python']
-engine_list: ['cls_inference']
-#engine_list: ['asr_python', 'cls_python']
+engine_list: ['asr_python', 'tts_python', 'cls_python']
 
 
 #################################################################################
diff --git a/tests/unit/server/conf/application.yaml b/tests/unit/server/conf/application.yaml
index 6048450b7ba..2b1a0599808 100644
--- a/tests/unit/server/conf/application.yaml
+++ b/tests/unit/server/conf/application.yaml
@@ -9,12 +9,14 @@ port: 8090
 # The task format in the engin_list is: <speech task>_<engine type>
 # task choices = ['asr_python', 'asr_inference', 'tts_python', 'tts_inference']
 
-engine_list: ['asr_python', 'tts_python']
+engine_list: ['asr_python', 'tts_python', 'cls_python']
 
 
 #################################################################################
 #                                ENGINE CONFIG                                  #
 #################################################################################
+
+################################### ASR #########################################
 ################### speech task: asr; engine_type: python #######################
 asr_python:
     model: 'conformer_wenetspeech'
@@ -46,6 +48,7 @@ asr_inference:
         summary: True  # False -> do not show predictor config
 
 
+################################### TTS #########################################
 ################### speech task: tts; engine_type: python #######################
 tts_python: 
     # am (acoustic model) choices=['speedyspeech_csmsc', 'fastspeech2_csmsc', 
@@ -105,3 +108,30 @@ tts_inference:
     # others
     lang: 'zh'
 
+
+################################### CLS #########################################
+################### speech task: cls; engine_type: python #######################
+cls_python:
+    # model choices=['panns_cnn14', 'panns_cnn10', 'panns_cnn6']
+    model: 'panns_cnn14'
+    cfg_path: # [optional] Config of cls task.
+    ckpt_path: # [optional] Checkpoint file of model.
+    label_file: # [optional] Label file of cls task.
+    device:  # set 'gpu:id' or 'cpu'
+
+
+################### speech task: cls; engine_type: inference #######################
+cls_inference:
+    # model_type choices=['panns_cnn14', 'panns_cnn10', 'panns_cnn6']
+    model_type: 'panns_cnn14' 
+    cfg_path: 
+    model_path:  # the pdmodel file of am static model [optional]
+    params_path:  # the pdiparams file of am static model [optional]
+    label_file:  # [optional] Label file of cls task.
+
+    predictor_conf:
+        device:  # set 'gpu:id' or 'cpu'
+        switch_ir_optim: True
+        glog_info: False  # True -> print glog
+        summary: True  # False -> do not show predictor config
+

From 89bc3ba4fa6dcf368eea664b1c089e6fa78e36a2 Mon Sep 17 00:00:00 2001
From: lym0302 <lym0302@foxmail.com>
Date: Wed, 9 Mar 2022 20:36:55 +0800
Subject: [PATCH 3/9] modify tests, test=doc

---
 tests/unit/server/change_yaml.py        | 10 +++++-
 tests/unit/server/test_server_client.sh | 41 +++++++++++++++----------
 2 files changed, 34 insertions(+), 17 deletions(-)

diff --git a/tests/unit/server/change_yaml.py b/tests/unit/server/change_yaml.py
index 1f063d8f509..cdeaebdbcf3 100644
--- a/tests/unit/server/change_yaml.py
+++ b/tests/unit/server/change_yaml.py
@@ -25,13 +25,15 @@ def change_device(yamlfile: str, engine: str, device: str):
 
     with open(tmp_yamlfile) as f, open(yamlfile, "w+", encoding="utf-8") as fw:
         y = yaml.safe_load(f)
-        if engine == 'asr_python' or engine == 'tts_python':
+        if engine == 'asr_python' or engine == 'tts_python' or engine == 'cls_python':
             y[engine]['device'] = set_device
         elif engine == 'asr_inference':
             y[engine]['am_predictor_conf']['device'] = set_device
         elif engine == 'tts_inference':
             y[engine]['am_predictor_conf']['device'] = set_device
             y[engine]['voc_predictor_conf']['device'] = set_device
+        elif engine == 'cls_inference':
+            y[engine]['predictor_conf']['device'] = set_device
         else:
             print(
                 "Please set correct engine: asr_python, tts_python, asr_inference, tts_inference."
@@ -84,6 +86,8 @@ def change_engine_type(yamlfile: str, engine_type):
             'enginetype-asr_inference',
             'enginetype-tts_python',
             'enginetype-tts_inference',
+            'enginetype-cls_python',
+            'enginetype-cls_inference',
             'device-asr_python-cpu',
             'device-asr_python-gpu',
             'device-asr_inference-cpu',
@@ -92,6 +96,10 @@ def change_engine_type(yamlfile: str, engine_type):
             'device-tts_python-gpu',
             'device-tts_inference-cpu',
             'device-tts_inference-gpu',
+            'device-cls_python-cpu',
+            'device-cls_python-gpu',
+            'device-cls_inference-cpu',
+            'device-cls_inference-gpu',
         ],
         required=True)
     args = parser.parse_args()
diff --git a/tests/unit/server/test_server_client.sh b/tests/unit/server/test_server_client.sh
index b48e7111b87..e7ae7604d17 100644
--- a/tests/unit/server/test_server_client.sh
+++ b/tests/unit/server/test_server_client.sh
@@ -33,15 +33,21 @@ ClientTest(){
     ((test_times+=1))
     paddlespeech_client tts --server_ip $server_ip --port $port --input "您好，欢迎使用百度飞桨语音合成服务。" --output output.wav 
     ((test_times+=1))  
+
+    # test cls client
+    paddlespeech_client cls --server_ip $server_ip --port $port --input ./zh.wav 
+    ((test_times+=1))
+    paddlespeech_client cls --server_ip $server_ip --port $port --input ./zh.wav 
+    ((test_times+=1)) 
 }
 
 GetTestResult() {
     # Determine if the test was successful
     response_success_time=$(cat log/server.log | grep "200 OK" -c)
     if (( $response_success_time == $test_times )) ; then
-        echo "Testing successfully. The service configuration is: asr engine type: $1; tts engine type: $1; device: $2."  | tee -a ./log/test_result.log
+        echo "Testing successfully. The service configuration is: asr engine type: $1; tts engine type: $1; cls engine type: $1; device: $2."  | tee -a ./log/test_result.log
     else
-        echo "Testing failed. The service configuration is: asr engine type: $1; tts engine type: $1; device: $2." | tee -a ./log/test_result.log
+        echo "Testing failed. The service configuration is: asr engine type: $1; tts engine type: $1; cls engine type: $1; device: $2." | tee -a ./log/test_result.log
     fi
     test_times=$response_success_time
 }
@@ -74,8 +80,8 @@ target_start_num=0  # the number of start service
 test_times=0  # The number of client test
 error_time=0  # The number of error occurrences in the startup failure server.log.wf file
 
-# start server: asr engine type: python; tts engine type: python; device: gpu
-echo "Start the service: asr engine type: python; tts engine type: python; device: gpu"  | tee -a ./log/test_result.log
+# start server: asr engine type: python; tts engine type: python; cls engine type: python; device: gpu
+echo "Start the service: asr engine type: python; tts engine type: python; cls engine type: python; device: gpu"  | tee -a ./log/test_result.log
 ((target_start_num+=1))
 StartService
 
@@ -98,11 +104,12 @@ echo "**************************************************************************
 
 
 
-# start server: asr engine type: python; tts engine type: python; device: cpu
-python change_yaml.py --change_task device-asr_python-cpu    # change asr.yaml device: cpu
-python change_yaml.py --change_task device-tts_python-cpu    # change tts.yaml device: cpu
+# start server: asr engine type: python; tts engine type: python; cls engine type: python; device: cpu
+python change_yaml.py --change_task device-asr_python-cpu    # change asr_python device: cpu
+python change_yaml.py --change_task device-tts_python-cpu    # change tts_python device: cpu
+python change_yaml.py --change_task device-cls_python-cpu    # change cls_python device: cpu
 
-echo "Start the service: asr engine type: python; tts engine type: python; device: cpu"  | tee -a ./log/test_result.log
+echo "Start the service: asr engine type: python; tts engine type: python; cls engine type: python; device: cpu"  | tee -a ./log/test_result.log
 ((target_start_num+=1))
 StartService
 
@@ -124,11 +131,12 @@ sleep 2s
 echo "**************************************************************************************" | tee -a ./log/test_result.log
 
 
-# start server: asr engine type: inference; tts engine type: inference; device: gpu
-python change_yaml.py --change_task enginetype-asr_inference    # change application.yaml, asr engine_type: inference; asr engine_backend: asr_pd.yaml
-python change_yaml.py --change_task enginetype-tts_inference    # change application.yaml, tts engine_type: inference; tts engine_backend: tts_pd.yaml
+# start server: asr engine type: inference; tts engine type: inference; cls engine type: inference; device: gpu
+python change_yaml.py --change_task enginetype-asr_inference    # change engine_list: 'asr_python' -> 'asr_inference'
+python change_yaml.py --change_task enginetype-tts_inference    # change engine_list: 'tts_python' -> 'tts_inference'
+python change_yaml.py --change_task enginetype-cls_inference    # change engine_list: 'cls_python' -> 'cls_inference'
 
-echo "Start the service: asr engine type: inference; tts engine type: inference; device: gpu"  | tee -a ./log/test_result.log
+echo "Start the service: asr engine type: inference; tts engine type: inference; cls engine type: inference; device: gpu"  | tee -a ./log/test_result.log
 ((target_start_num+=1))
 StartService
 
@@ -150,11 +158,12 @@ sleep 2s
 echo "**************************************************************************************" | tee -a ./log/test_result.log
 
 
-# start server: asr engine type: inference; tts engine type: inference; device: cpu
-python change_yaml.py --change_task device-asr_inference-cpu    # change asr_pd.yaml device: cpu
-python change_yaml.py --change_task device-tts_inference-cpu    # change tts_pd.yaml device: cpu
+# start server: asr engine type: inference; tts engine type: inference; cls engine type: inference; device: cpu
+python change_yaml.py --change_task device-asr_inference-cpu    # change asr_inference device: cpu
+python change_yaml.py --change_task device-tts_inference-cpu    # change tts_inference device: cpu
+python change_yaml.py --change_task device-cls_inference-cpu    # change cls_inference device: cpu
 
-echo "start the service: asr engine type: inference; tts engine type: inference; device: cpu"  | tee -a ./log/test_result.log
+echo "start the service: asr engine type: inference; tts engine type: inference; cls engine type: inference; device: cpu"  | tee -a ./log/test_result.log
 ((target_start_num+=1))
 StartService
 

From 5187df847f0d7f26ebcf90ef22eaa0101a4eae76 Mon Sep 17 00:00:00 2001
From: lym0302 <lym0302@foxmail.com>
Date: Wed, 9 Mar 2022 21:02:55 +0800
Subject: [PATCH 4/9] modify server demo, test=doc

---
 demos/speech_server/README.md                 | 49 +++++++++++++++
 demos/speech_server/README_cn.md              | 54 +++++++++++++++-
 .../server/bin/paddlespeech_server.py         | 62 +++++--------------
 3 files changed, 118 insertions(+), 47 deletions(-)

diff --git a/demos/speech_server/README.md b/demos/speech_server/README.md
index a2f6f221320..46d8f7c22cd 100644
--- a/demos/speech_server/README.md
+++ b/demos/speech_server/README.md
@@ -182,6 +182,52 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee
 
   ```
 
+### 6. CLS Client Usage
+**Note:** The response time will be slightly longer when using the client for the first time
+- Command Line (Recommended)
+   ```
+   paddlespeech_client cls --server_ip 127.0.0.1 --port 8090 --input ./zh.wav
+   ```
+
+  Usage:
+  
+  ```bash
+  paddlespeech_client cls --help
+  ```
+  Arguments:
+  - `server_ip`: server ip. Default: 127.0.0.1
+  - `port`: server port. Default: 8090
+  - `input`(required): Audio file to be classified.
+  - `topk`: topk scores of classification result.
+
+  Output:
+  ```bash
+  [2022-03-09 20:44:39,974] [    INFO] - {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'topk': 1, 'results': [{'class_name': 'Speech', 'prob': 0.9027184844017029}]}}
+  [2022-03-09 20:44:39,975] [    INFO] - Response time 0.104360 s.
+
+
+  ```
+
+- Python API
+  ```python
+  from paddlespeech.server.bin.paddlespeech_client import CLSClientExecutor
+
+  clsclient_executor = CLSClientExecutor()
+  clsclient_executor(
+      input="./zh.wav",
+      server_ip="127.0.0.1",
+      port=8090,
+      topk=1)
+
+  ```
+
+  Output:
+  ```bash
+  {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'topk': 1, 'results': [{'class_name': 'Speech', 'prob': 0.9027184844017029}]}}
+  Response time 0.150897 s.
+
+  ```
+
 
 ## Models supported by the service
 ### ASR model
@@ -189,3 +235,6 @@ Get all models supported by the ASR service via `paddlespeech_server stats --tas
 
 ### TTS model
 Get all models supported by the TTS service via `paddlespeech_server stats --task tts`, where static models can be used for paddle inference inference.
+
+### CLS model
+Get all models supported by the CLS service via `paddlespeech_server stats --task cls`, where static models can be used for paddle inference inference.
diff --git a/demos/speech_server/README_cn.md b/demos/speech_server/README_cn.md
index 762248a117f..1328ca279c9 100644
--- a/demos/speech_server/README_cn.md
+++ b/demos/speech_server/README_cn.md
@@ -80,7 +80,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee
 
   ```
 
-### 4. ASR客户端使用方法
+### 4. ASR 客户端使用方法
 **注意：** 初次使用客户端时响应时间会略长
 - 命令行 (推荐使用)
    ```
@@ -129,7 +129,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee
 
   ```
  
-### 5. TTS客户端使用方法
+### 5. TTS 客户端使用方法
 **注意：** 初次使用客户端时响应时间会略长
 - 命令行 (推荐使用)
 
@@ -185,9 +185,59 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee
 
   ```
 
+  ### 5. CLS 客户端使用方法
+  **注意：** 初次使用客户端时响应时间会略长
+  - 命令行 (推荐使用)
+   ```
+   paddlespeech_client cls --server_ip 127.0.0.1 --port 8090 --input ./zh.wav
+   ```
+
+  使用帮助:
+  
+  ```bash
+  paddlespeech_client cls --help
+  ```
+  参数:
+  - `server_ip`: 服务端ip地址，默认: 127.0.0.1。
+  - `port`: 服务端口，默认: 8090。
+  - `input`(必须输入): 用于分类的音频文件。
+  - `topk`: 分类结果的topk。
+
+  输出:
+  ```bash
+  [2022-03-09 20:44:39,974] [    INFO] - {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'topk': 1, 'results': [{'class_name': 'Speech', 'prob': 0.9027184844017029}]}}
+  [2022-03-09 20:44:39,975] [    INFO] - Response time 0.104360 s.
+
+
+  ```
+
+- Python API
+  ```python
+  from paddlespeech.server.bin.paddlespeech_client import CLSClientExecutor
+
+  clsclient_executor = CLSClientExecutor()
+  clsclient_executor(
+      input="./zh.wav",
+      server_ip="127.0.0.1",
+      port=8090,
+      topk=1)
+
+  ```
+
+  输出:
+  ```bash
+  {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'topk': 1, 'results': [{'class_name': 'Speech', 'prob': 0.9027184844017029}]}}
+  Response time 0.150897 s.
+
+  ```
+
+
 ## 服务支持的模型
 ### ASR支持的模型
 通过 `paddlespeech_server stats --task asr` 获取ASR服务支持的所有模型，其中静态模型可用于 paddle inference 推理。 
 
 ### TTS支持的模型
 通过 `paddlespeech_server stats --task tts` 获取TTS服务支持的所有模型，其中静态模型可用于 paddle inference 推理。
+
+### CLS支持的模型
+通过 `paddlespeech_server stats --task cls` 获取CLS服务支持的所有模型，其中静态模型可用于 paddle inference 推理。
diff --git a/paddlespeech/server/bin/paddlespeech_server.py b/paddlespeech/server/bin/paddlespeech_server.py
index 3d71f091b3d..7e7f03b2d76 100644
--- a/paddlespeech/server/bin/paddlespeech_server.py
+++ b/paddlespeech/server/bin/paddlespeech_server.py
@@ -103,13 +103,14 @@ def __init__(self):
             '--task',
             type=str,
             default=None,
-            choices=['asr', 'tts'],
+            choices=['asr', 'tts', 'cls'],
             help='Choose speech task.',
             required=True)
-        self.task_choices = ['asr', 'tts']
+        self.task_choices = ['asr', 'tts', 'cls']
         self.model_name_format = {
             'asr': 'Model-Language-Sample Rate',
-            'tts': 'Model-Language'
+            'tts': 'Model-Language',
+            'cls': 'Model-Sample Rate'
         }
 
     def show_support_models(self, pretrained_models: dict):
@@ -173,54 +174,25 @@ def execute(self, argv: List[str]) -> bool:
                     "Failed to get the table of TTS pretrained models supported in the service."
                 )
                 return False
-
-    @stats_wrapper
-    def __call__(
-            self,
-            task: str=None, ):
-        """
-            Python API to call an executor.
-        """
-        self.task = task
-        if self.task not in self.task_choices:
-            print("Please input correct speech task, choices = ['asr', 'tts']")
-
-        elif self.task == 'asr':
+        
+        elif self.task == 'cls':
             try:
-                from paddlespeech.cli.asr.infer import pretrained_models
-                print(
-                    "Here is the table of ASR pretrained models supported in the service."
-                )
-                self.show_support_models(pretrained_models)
-
-                # show ASR static pretrained model
-                from paddlespeech.server.engine.asr.paddleinference.asr_engine import pretrained_models
-                print(
-                    "Here is the table of ASR static pretrained models supported in the service."
-                )
-                self.show_support_models(pretrained_models)
-
-            except BaseException:
-                print(
-                    "Failed to get the table of ASR pretrained models supported in the service."
-                )
-
-        elif self.task == 'tts':
-            try:
-                from paddlespeech.cli.tts.infer import pretrained_models
-                print(
-                    "Here is the table of TTS pretrained models supported in the service."
+                from paddlespeech.cli.cls.infer import pretrained_models
+                logger.info(
+                    "Here is the table of CLS pretrained models supported in the service."
                 )
                 self.show_support_models(pretrained_models)
 
-                # show TTS static pretrained model
-                from paddlespeech.server.engine.tts.paddleinference.tts_engine import pretrained_models
-                print(
-                    "Here is the table of TTS static pretrained models supported in the service."
+                # show CLS static pretrained model
+                from paddlespeech.server.engine.cls.paddleinference.cls_engine import pretrained_models
+                logger.info(
+                    "Here is the table of CLS static pretrained models supported in the service."
                 )
                 self.show_support_models(pretrained_models)
 
+                return True
             except BaseException:
-                print(
-                    "Failed to get the table of TTS pretrained models supported in the service."
+                logger.error(
+                    "Failed to get the table of CLS pretrained models supported in the service."
                 )
+                return False

From 89dbda58f6376bd80185586431b6675a07810336 Mon Sep 17 00:00:00 2001
From: lym0302 <lym0302@foxmail.com>
Date: Thu, 10 Mar 2022 13:56:24 +0800
Subject: [PATCH 5/9] add cls static model, test=doc

---
 docs/source/released_model.md                                | 4 ++--
 paddlespeech/server/engine/cls/paddleinference/cls_engine.py | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/docs/source/released_model.md b/docs/source/released_model.md
index 8f855f7cf1e..0ece7c668da 100644
--- a/docs/source/released_model.md
+++ b/docs/source/released_model.md
@@ -67,9 +67,9 @@ GE2E + FastSpeech2 | AISHELL-3  |[ge2e-fastspeech2-aishell3](https://github.com/
 
 ## Audio Classification Models
 
-Model Type | Dataset| Example Link | Pretrained Models
+Model Type | Dataset| Example Link | Pretrained Models | Static Models 
 :-------------:| :------------:| :-----: | :-----:
-PANN | Audioset| [audioset_tagging_cnn](https://github.com/qiuqiangkong/audioset_tagging_cnn) | [panns_cnn6.pdparams](https://bj.bcebos.com/paddleaudio/models/panns_cnn6.pdparams), [panns_cnn10.pdparams](https://bj.bcebos.com/paddleaudio/models/panns_cnn10.pdparams), [panns_cnn14.pdparams](https://bj.bcebos.com/paddleaudio/models/panns_cnn14.pdparams)
+PANN | Audioset| [audioset_tagging_cnn](https://github.com/qiuqiangkong/audioset_tagging_cnn) | [panns_cnn6.pdparams](https://bj.bcebos.com/paddleaudio/models/panns_cnn6.pdparams), [panns_cnn10.pdparams](https://bj.bcebos.com/paddleaudio/models/panns_cnn10.pdparams), [panns_cnn14.pdparams](https://bj.bcebos.com/paddleaudio/models/panns_cnn14.pdparams) | [panns_cnn6_static.tar.gz](https://paddlespeech.bj.bcebos.com/cls/inference_model/panns_cnn6_static.tar.gz), [panns_cnn10_static.tar.gz](https://paddlespeech.bj.bcebos.com/cls/inference_model/panns_cnn10_static.tar.gz), [panns_cnn14_static.tar.gz](https://paddlespeech.bj.bcebos.com/cls/inference_model/panns_cnn14_static.tar.gz) 
 PANN | ESC-50 |[pann-esc50](../../examples/esc50/cls0)|[esc50_cnn6.tar.gz](https://paddlespeech.bj.bcebos.com/cls/esc50/esc50_cnn6.tar.gz), [esc50_cnn10.tar.gz](https://paddlespeech.bj.bcebos.com/cls/esc50/esc50_cnn10.tar.gz), [esc50_cnn14.tar.gz](https://paddlespeech.bj.bcebos.com/cls/esc50/esc50_cnn14.tar.gz)
 
 ## Punctuation Restoration Models
diff --git a/paddlespeech/server/engine/cls/paddleinference/cls_engine.py b/paddlespeech/server/engine/cls/paddleinference/cls_engine.py
index 9ccc1f78461..3982effd902 100644
--- a/paddlespeech/server/engine/cls/paddleinference/cls_engine.py
+++ b/paddlespeech/server/engine/cls/paddleinference/cls_engine.py
@@ -92,7 +92,6 @@ def _get_pretrained_path(self, tag: str) -> os.PathLike:
         res_path = os.path.join(MODEL_HOME, tag)
         decompressed_path = download_and_decompress(pretrained_models[tag],
                                                     res_path)
-        print("aaaaaaaaaaaaa: ", decompressed_path)
         decompressed_path = os.path.abspath(decompressed_path)
         logger.info(
             'Use pretrained model stored in: {}'.format(decompressed_path))

From 87ec33a647ef98b00be7356677e57d176c8cf185 Mon Sep 17 00:00:00 2001
From: lym0302 <lym0302@foxmail.com>
Date: Thu, 10 Mar 2022 14:00:46 +0800
Subject: [PATCH 6/9] modify released_model.md, test=doc

---
 docs/source/released_model.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/source/released_model.md b/docs/source/released_model.md
index 0ece7c668da..32e1ec4ded1 100644
--- a/docs/source/released_model.md
+++ b/docs/source/released_model.md
@@ -59,7 +59,7 @@ WaveRNN | CSMSC |[WaveRNN-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tr
 
 ### Voice Cloning
 Model Type | Dataset| Example Link | Pretrained Models
-:-------------:| :------------:| :-----: | :-----:
+:-------------:| :------------:| :-----: | :-----: | 
 GE2E| AISHELL-3, etc. |[ge2e](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/ge2e)|[ge2e_ckpt_0.3.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/ge2e/ge2e_ckpt_0.3.zip)
 GE2E + Tactron2| AISHELL-3 |[ge2e-tactron2-aishell3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/vc0)|[tacotron2_aishell3_ckpt_vc0_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_aishell3_ckpt_vc0_0.2.0.zip)
 GE2E + FastSpeech2 | AISHELL-3  |[ge2e-fastspeech2-aishell3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/vc1)|[fastspeech2_nosil_aishell3_vc1_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_aishell3_vc1_ckpt_0.5.zip)
@@ -68,7 +68,7 @@ GE2E + FastSpeech2 | AISHELL-3  |[ge2e-fastspeech2-aishell3](https://github.com/
 ## Audio Classification Models
 
 Model Type | Dataset| Example Link | Pretrained Models | Static Models 
-:-------------:| :------------:| :-----: | :-----:
+:-------------:| :------------:| :-----: | :-----: | :-----:
 PANN | Audioset| [audioset_tagging_cnn](https://github.com/qiuqiangkong/audioset_tagging_cnn) | [panns_cnn6.pdparams](https://bj.bcebos.com/paddleaudio/models/panns_cnn6.pdparams), [panns_cnn10.pdparams](https://bj.bcebos.com/paddleaudio/models/panns_cnn10.pdparams), [panns_cnn14.pdparams](https://bj.bcebos.com/paddleaudio/models/panns_cnn14.pdparams) | [panns_cnn6_static.tar.gz](https://paddlespeech.bj.bcebos.com/cls/inference_model/panns_cnn6_static.tar.gz), [panns_cnn10_static.tar.gz](https://paddlespeech.bj.bcebos.com/cls/inference_model/panns_cnn10_static.tar.gz), [panns_cnn14_static.tar.gz](https://paddlespeech.bj.bcebos.com/cls/inference_model/panns_cnn14_static.tar.gz) 
 PANN | ESC-50 |[pann-esc50](../../examples/esc50/cls0)|[esc50_cnn6.tar.gz](https://paddlespeech.bj.bcebos.com/cls/esc50/esc50_cnn6.tar.gz), [esc50_cnn10.tar.gz](https://paddlespeech.bj.bcebos.com/cls/esc50/esc50_cnn10.tar.gz), [esc50_cnn14.tar.gz](https://paddlespeech.bj.bcebos.com/cls/esc50/esc50_cnn14.tar.gz)
 

From 8ef92a9495e5e85c30d0aac74f7a0964128c8c2d Mon Sep 17 00:00:00 2001
From: lym0302 <lym0302@foxmail.com>
Date: Thu, 10 Mar 2022 15:33:05 +0800
Subject: [PATCH 7/9] modify, test=doc

---
 demos/speech_server/cls_client.sh             |   4 +
 docs/source/released_model.md                 |   2 +-
 .../server/bin/paddlespeech_client.py         | 159 ++++++++----------
 .../engine/tts/paddleinference/tts_engine.py  |   2 +-
 .../server/engine/tts/python/tts_engine.py    |   2 +-
 paddlespeech/server/restful/response.py       |   3 +-
 paddlespeech/server/restful/tts_api.py        |   3 +-
 7 files changed, 79 insertions(+), 96 deletions(-)
 create mode 100644 demos/speech_server/cls_client.sh

diff --git a/demos/speech_server/cls_client.sh b/demos/speech_server/cls_client.sh
new file mode 100644
index 00000000000..5797aa204f6
--- /dev/null
+++ b/demos/speech_server/cls_client.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
+paddlespeech_client cls --server_ip 127.0.0.1 --port 8090 --input ./zh.wav --topk 1
diff --git a/docs/source/released_model.md b/docs/source/released_model.md
index 32e1ec4ded1..ffe721b8f28 100644
--- a/docs/source/released_model.md
+++ b/docs/source/released_model.md
@@ -69,7 +69,7 @@ GE2E + FastSpeech2 | AISHELL-3  |[ge2e-fastspeech2-aishell3](https://github.com/
 
 Model Type | Dataset| Example Link | Pretrained Models | Static Models 
 :-------------:| :------------:| :-----: | :-----: | :-----:
-PANN | Audioset| [audioset_tagging_cnn](https://github.com/qiuqiangkong/audioset_tagging_cnn) | [panns_cnn6.pdparams](https://bj.bcebos.com/paddleaudio/models/panns_cnn6.pdparams), [panns_cnn10.pdparams](https://bj.bcebos.com/paddleaudio/models/panns_cnn10.pdparams), [panns_cnn14.pdparams](https://bj.bcebos.com/paddleaudio/models/panns_cnn14.pdparams) | [panns_cnn6_static.tar.gz](https://paddlespeech.bj.bcebos.com/cls/inference_model/panns_cnn6_static.tar.gz), [panns_cnn10_static.tar.gz](https://paddlespeech.bj.bcebos.com/cls/inference_model/panns_cnn10_static.tar.gz), [panns_cnn14_static.tar.gz](https://paddlespeech.bj.bcebos.com/cls/inference_model/panns_cnn14_static.tar.gz) 
+PANN | Audioset| [audioset_tagging_cnn](https://github.com/qiuqiangkong/audioset_tagging_cnn) | [panns_cnn6.pdparams](https://bj.bcebos.com/paddleaudio/models/panns_cnn6.pdparams), [panns_cnn10.pdparams](https://bj.bcebos.com/paddleaudio/models/panns_cnn10.pdparams), [panns_cnn14.pdparams](https://bj.bcebos.com/paddleaudio/models/panns_cnn14.pdparams) | [panns_cnn6_static.tar.gz](https://paddlespeech.bj.bcebos.com/cls/inference_model/panns_cnn6_static.tar.gz)(18M), [panns_cnn10_static.tar.gz](https://paddlespeech.bj.bcebos.com/cls/inference_model/panns_cnn10_static.tar.gz)(19M), [panns_cnn14_static.tar.gz](https://paddlespeech.bj.bcebos.com/cls/inference_model/panns_cnn14_static.tar.gz)(289M) 
 PANN | ESC-50 |[pann-esc50](../../examples/esc50/cls0)|[esc50_cnn6.tar.gz](https://paddlespeech.bj.bcebos.com/cls/esc50/esc50_cnn6.tar.gz), [esc50_cnn10.tar.gz](https://paddlespeech.bj.bcebos.com/cls/esc50/esc50_cnn10.tar.gz), [esc50_cnn14.tar.gz](https://paddlespeech.bj.bcebos.com/cls/esc50/esc50_cnn14.tar.gz)
 
 ## Punctuation Restoration Models
diff --git a/paddlespeech/server/bin/paddlespeech_client.py b/paddlespeech/server/bin/paddlespeech_client.py
index 939779b7465..c13aadeb31d 100644
--- a/paddlespeech/server/bin/paddlespeech_client.py
+++ b/paddlespeech/server/bin/paddlespeech_client.py
@@ -70,13 +70,10 @@ def __init__(self):
             choices=[0, 8000, 16000],
             help='Sampling rate, the default is the same as the model')
         self.parser.add_argument(
-            '--output',
-            type=str,
-            default="./output.wav",
-            help='Synthesized audio file')
+            '--output', type=str, default=None, help='Synthesized audio file')
 
-    def postprocess(self, response_dict: dict, outfile: str) -> float:
-        wav_base64 = response_dict["result"]["audio"]
+    def postprocess(self, wav_base64: str, outfile: str) -> float:
+        #wav_base64 = response_dict["result"]["audio"]
         audio_data_byte = base64.b64decode(wav_base64)
         # from byte
         samples, sample_rate = soundfile.read(
@@ -93,37 +90,38 @@ def postprocess(self, response_dict: dict, outfile: str) -> float:
         else:
             logger.error("The format for saving audio only supports wav or pcm")
 
-        duration = len(samples) / sample_rate
-        return duration
-
     def execute(self, argv: List[str]) -> bool:
         args = self.parser.parse_args(argv)
-        try:
-            url = 'http://' + args.server_ip + ":" + str(
-                args.port) + '/paddlespeech/tts'
-            request = {
-                "text": args.input,
-                "spk_id": args.spk_id,
-                "speed": args.speed,
-                "volume": args.volume,
-                "sample_rate": args.sample_rate,
-                "save_path": args.output
-            }
-            st = time.time()
-            response = requests.post(url, json.dumps(request))
-            time_consume = time.time() - st
-
-            response_dict = response.json()
-            duration = self.postprocess(response_dict, args.output)
+        input_ = args.input
+        server_ip = args.server_ip
+        port = args.port
+        spk_id = args.spk_id
+        speed = args.speed
+        volume = args.volume
+        sample_rate = args.sample_rate
+        output = args.output
 
+        try:
+            time_start = time.time()
+            res = self(
+                input=input_,
+                server_ip=server_ip,
+                port=port,
+                spk_id=spk_id,
+                speed=speed,
+                volume=volume,
+                sample_rate=sample_rate,
+                output=output)
+            time_end = time.time()
+            time_consume = time_end - time_start
+            response_dict = res.json()
             logger.info(response_dict["message"])
-            logger.info("Save synthesized audio successfully on %s." %
-                        (args.output))
-            logger.info("Audio duration: %f s." % (duration))
+            logger.info("Save synthesized audio successfully on %s." % (output))
+            logger.info("Audio duration: %f s." %
+                        (response_dict['result']['duration']))
             logger.info("Response time: %f s." % (time_consume))
-
             return True
-        except BaseException:
+        except Exception as e:
             logger.error("Failed to synthesized audio.")
             return False
 
@@ -136,7 +134,7 @@ def __call__(self,
                  speed: float=1.0,
                  volume: float=1.0,
                  sample_rate: int=0,
-                 output: str="./output.wav"):
+                 output: str=None):
         """
         Python API to call an executor.
         """
@@ -151,20 +149,11 @@ def __call__(self,
             "save_path": output
         }
 
-        try:
-            st = time.time()
-            response = requests.post(url, json.dumps(request))
-            time_consume = time.time() - st
-            response_dict = response.json()
-            duration = self.postprocess(response_dict, output)
-
-            print(response_dict["message"])
-            print("Save synthesized audio successfully on %s." % (output))
-            print("Audio duration: %f s." % (duration))
-            print("Response time: %f s." % (time_consume))
-            print("RTF: %f " % (time_consume / duration))
-        except BaseException:
-            print("Failed to synthesized audio.")
+        res = requests.post(url, json.dumps(request))
+        response_dict = res.json()
+        if not output:
+            self.postprocess(response_dict["result"]["audio"], output)
+        return res
 
 
 @cli_client_register(
@@ -193,24 +182,27 @@ def __init__(self):
 
     def execute(self, argv: List[str]) -> bool:
         args = self.parser.parse_args(argv)
-        url = 'http://' + args.server_ip + ":" + str(
-            args.port) + '/paddlespeech/asr'
-        audio = wav2base64(args.input)
-        data = {
-            "audio": audio,
-            "audio_format": args.audio_format,
-            "sample_rate": args.sample_rate,
-            "lang": args.lang,
-        }
-        time_start = time.time()
+        input_ = args.input
+        server_ip = args.server_ip
+        port = args.port
+        sample_rate = args.sample_rate
+        lang = args.lang
+        audio_format = args.audio_format
+
         try:
-            r = requests.post(url=url, data=json.dumps(data))
-            # ending Timestamp
+            time_start = time.time()
+            res = self(
+                input=input_,
+                server_ip=server_ip,
+                port=port,
+                sample_rate=sample_rate,
+                lang=lang,
+                audio_format=audio_format)
             time_end = time.time()
-            logger.info(r.json())
-            logger.info("time cost %f s." % (time_end - time_start))
+            logger.info(res.json())
+            logger.info("Response time %f s." % (time_end - time_start))
             return True
-        except BaseException:
+        except Exception as e:
             logger.error("Failed to speech recognition.")
             return False
 
@@ -234,15 +226,9 @@ def __call__(self,
             "sample_rate": sample_rate,
             "lang": lang,
         }
-        time_start = time.time()
-        try:
-            r = requests.post(url=url, data=json.dumps(data))
-            # ending Timestamp
-            time_end = time.time()
-            print(r.json())
-            print("time cost %f s." % (time_end - time_start))
-        except BaseException:
-            print("Failed to speech recognition.")
+
+        res = requests.post(url=url, data=json.dumps(data))
+        return res
 
 
 @cli_client_register(
@@ -270,22 +256,19 @@ def __init__(self):
 
     def execute(self, argv: List[str]) -> bool:
         args = self.parser.parse_args(argv)
-        url = 'http://' + args.server_ip + ":" + str(
-            args.port) + '/paddlespeech/cls'
-        audio = wav2base64(args.input)
-        data = {
-            "audio": audio,
-            "topk": args.topk,
-        }
-        time_start = time.time()
+        input_ = args.input
+        server_ip = args.server_ip
+        port = args.port
+        topk = args.topk
+
         try:
-            r = requests.post(url=url, data=json.dumps(data))
-            # ending Timestamp
+            time_start = time.time()
+            res = self(input=input_, server_ip=server_ip, port=port, topk=topk)
             time_end = time.time()
-            logger.info(r.json())
+            logger.info(res.json())
             logger.info("Response time %f s." % (time_end - time_start))
             return True
-        except BaseException:
+        except Exception as e:
             logger.error("Failed to speech classification.")
             return False
 
@@ -302,12 +285,6 @@ def __call__(self,
         url = 'http://' + server_ip + ":" + str(port) + '/paddlespeech/cls'
         audio = wav2base64(input)
         data = {"audio": audio, "topk": topk}
-        time_start = time.time()
-        try:
-            r = requests.post(url=url, data=json.dumps(data))
-            # ending Timestamp
-            time_end = time.time()
-            print(r.json())
-            print("Response time %f s." % (time_end - time_start))
-        except BaseException:
-            print("Failed to speech classification.")
+
+        res = requests.post(url=url, data=json.dumps(data))
+        return res
diff --git a/paddlespeech/server/engine/tts/paddleinference/tts_engine.py b/paddlespeech/server/engine/tts/paddleinference/tts_engine.py
index 6d23fb5be3f..db8813ba901 100644
--- a/paddlespeech/server/engine/tts/paddleinference/tts_engine.py
+++ b/paddlespeech/server/engine/tts/paddleinference/tts_engine.py
@@ -531,4 +531,4 @@ def run(self,
                                                            postprocess_time))
         logger.info("RTF: {}".format(rtf))
 
-        return lang, target_sample_rate, wav_base64
+        return lang, target_sample_rate, duration, wav_base64
diff --git a/paddlespeech/server/engine/tts/python/tts_engine.py b/paddlespeech/server/engine/tts/python/tts_engine.py
index 8d6c7fd17e5..f153f60b966 100644
--- a/paddlespeech/server/engine/tts/python/tts_engine.py
+++ b/paddlespeech/server/engine/tts/python/tts_engine.py
@@ -250,4 +250,4 @@ def run(self,
         logger.info("RTF: {}".format(rtf))
         logger.info("device: {}".format(self.device))
 
-        return lang, target_sample_rate, wav_base64
+        return lang, target_sample_rate, duration, wav_base64
diff --git a/paddlespeech/server/restful/response.py b/paddlespeech/server/restful/response.py
index 9b647a63c49..ce3f34a772f 100644
--- a/paddlespeech/server/restful/response.py
+++ b/paddlespeech/server/restful/response.py
@@ -54,10 +54,11 @@ class ASRResponse(BaseModel):
 #****************************************************************************************/
 class TTSResult(BaseModel):
     lang: str = "zh"
-    sample_rate: int
     spk_id: int = 0
     speed: float = 1.0
     volume: float = 1.0
+    sample_rate: int
+    duration: float
     save_path: str = None
     audio: str
 
diff --git a/paddlespeech/server/restful/tts_api.py b/paddlespeech/server/restful/tts_api.py
index 0af0f6d0790..4e9bbe23ed3 100644
--- a/paddlespeech/server/restful/tts_api.py
+++ b/paddlespeech/server/restful/tts_api.py
@@ -98,7 +98,7 @@ def tts(request_body: TTSRequest):
         tts_engine = engine_pool['tts']
         logger.info("Get tts engine successfully.")
 
-        lang, target_sample_rate, wav_base64 = tts_engine.run(
+        lang, target_sample_rate, duration, wav_base64 = tts_engine.run(
             text, spk_id, speed, volume, sample_rate, save_path)
 
         response = {
@@ -113,6 +113,7 @@ def tts(request_body: TTSRequest):
                 "speed": speed,
                 "volume": volume,
                 "sample_rate": target_sample_rate,
+                "duration": duration,
                 "save_path": save_path,
                 "audio": wav_base64
             }

From 77bad44e8bdd5d2e5de5a6ed558d88f331367b22 Mon Sep 17 00:00:00 2001
From: lym0302 <lym0302@foxmail.com>
Date: Thu, 10 Mar 2022 15:54:28 +0800
Subject: [PATCH 8/9] modify readme, test=doc

---
 demos/speech_server/README.md           | 22 ++++++++++++++--------
 demos/speech_server/README_cn.md        | 21 ++++++++++++++-------
 paddlespeech/server/restful/response.py |  4 +++-
 3 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/demos/speech_server/README.md b/demos/speech_server/README.md
index 46d8f7c22cd..37a01698773 100644
--- a/demos/speech_server/README.md
+++ b/demos/speech_server/README.md
@@ -110,21 +110,22 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee
 - Python API
   ```python
   from paddlespeech.server.bin.paddlespeech_client import ASRClientExecutor
+  import json
 
   asrclient_executor = ASRClientExecutor()
-  asrclient_executor(
+  res = asrclient_executor(
       input="./zh.wav",
       server_ip="127.0.0.1",
       port=8090,
       sample_rate=16000,
       lang="zh_cn",
       audio_format="wav")
+  print(res.json())
   ```
 
   Output:
   ```bash
   {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'transcription': '我认为跑步最重要的就是给我带来了身体健康'}}
-  time cost 0.604353 s.
   ```
  
 ### 5. TTS Client Usage
@@ -146,7 +147,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee
     - `speed`: Audio speed, the value should be set between 0 and 3. Default: 1.0
     - `volume`: Audio volume, the value should be set between 0 and 3. Default: 1.0
     - `sample_rate`: Sampling rate, choice: [0, 8000, 16000], the default is the same as the model. Default: 0
-    - `output`: Output wave filepath. Default: `output.wav`.
+    - `output`: Output wave filepath. Default: None, which means not to save the audio to the local.
 
     Output:
     ```bash
@@ -160,9 +161,10 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee
 - Python API
   ```python
   from paddlespeech.server.bin.paddlespeech_client import TTSClientExecutor
+  import json
 
   ttsclient_executor = TTSClientExecutor()
-  ttsclient_executor(
+  res = ttsclient_executor(
       input="您好，欢迎使用百度飞桨语音合成服务。",
       server_ip="127.0.0.1",
       port=8090,
@@ -171,6 +173,11 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee
       volume=1.0,
       sample_rate=0,
       output="./output.wav")
+
+  response_dict = res.json()
+  print(response_dict["message"])
+  print("Save synthesized audio successfully on %s." % (response_dict['result']['save_path']))
+  print("Audio duration: %f s." %(response_dict['result']['duration']))
   ```
 
   Output:
@@ -178,7 +185,6 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee
   {'description': 'success.'}
   Save synthesized audio successfully on ./output.wav.
   Audio duration: 3.612500 s.
-  Response time: 0.388317 s.
 
   ```
 
@@ -211,20 +217,20 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee
 - Python API
   ```python
   from paddlespeech.server.bin.paddlespeech_client import CLSClientExecutor
+  import json
 
   clsclient_executor = CLSClientExecutor()
-  clsclient_executor(
+  res = clsclient_executor(
       input="./zh.wav",
       server_ip="127.0.0.1",
       port=8090,
       topk=1)
-
+  print(res.jaon())
   ```
 
   Output:
   ```bash
   {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'topk': 1, 'results': [{'class_name': 'Speech', 'prob': 0.9027184844017029}]}}
-  Response time 0.150897 s.
 
   ```
 
diff --git a/demos/speech_server/README_cn.md b/demos/speech_server/README_cn.md
index 1328ca279c9..2a18fb40bfa 100644
--- a/demos/speech_server/README_cn.md
+++ b/demos/speech_server/README_cn.md
@@ -111,21 +111,22 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee
 - Python API
   ```python
   from paddlespeech.server.bin.paddlespeech_client import ASRClientExecutor
+  import json
 
   asrclient_executor = ASRClientExecutor()
-  asrclient_executor(
+  res = asrclient_executor(
       input="./zh.wav",
       server_ip="127.0.0.1",
       port=8090,
       sample_rate=16000,
       lang="zh_cn",
       audio_format="wav")
+  print(res.json())
   ```
 
   输出:
   ```bash
   {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'transcription': '我认为跑步最重要的就是给我带来了身体健康'}}
-  time cost 0.604353 s.
 
   ```
  
@@ -150,7 +151,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee
     - `speed`: 音频速度，该值应设置在 0 到 3 之间。 默认值：1.0
     - `volume`: 音频音量，该值应设置在 0 到 3 之间。 默认值： 1.0
     - `sample_rate`: 采样率，可选 [0, 8000, 16000]，默认与模型相同。 默认值：0
-    - `output`: 输出音频的路径， 默认值：output.wav。
+    - `output`: 输出音频的路径， 默认值：None，表示不保存音频到本地。
 
     输出:
     ```bash
@@ -163,9 +164,10 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee
 - Python API
   ```python
   from paddlespeech.server.bin.paddlespeech_client import TTSClientExecutor
+  import json
 
   ttsclient_executor = TTSClientExecutor()
-  ttsclient_executor(
+  res = ttsclient_executor(
       input="您好，欢迎使用百度飞桨语音合成服务。",
       server_ip="127.0.0.1",
       port=8090,
@@ -174,6 +176,11 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee
       volume=1.0,
       sample_rate=0,
       output="./output.wav")
+
+  response_dict = res.json()
+  print(response_dict["message"])
+  print("Save synthesized audio successfully on %s." % (response_dict['result']['save_path']))
+  print("Audio duration: %f s." %(response_dict['result']['duration']))
   ```
 
   输出:
@@ -181,7 +188,6 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee
   {'description': 'success.'}
   Save synthesized audio successfully on ./output.wav.
   Audio duration: 3.612500 s.
-  Response time: 0.388317 s.
 
   ```
 
@@ -214,20 +220,21 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee
 - Python API
   ```python
   from paddlespeech.server.bin.paddlespeech_client import CLSClientExecutor
+  import json
 
   clsclient_executor = CLSClientExecutor()
-  clsclient_executor(
+  res = clsclient_executor(
       input="./zh.wav",
       server_ip="127.0.0.1",
       port=8090,
       topk=1)
+  print(res.jaon())
 
   ```
 
   输出:
   ```bash
   {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'topk': 1, 'results': [{'class_name': 'Speech', 'prob': 0.9027184844017029}]}}
-  Response time 0.150897 s.
 
   ```
 
diff --git a/paddlespeech/server/restful/response.py b/paddlespeech/server/restful/response.py
index ce3f34a772f..a2a207e4f68 100644
--- a/paddlespeech/server/restful/response.py
+++ b/paddlespeech/server/restful/response.py
@@ -74,9 +74,11 @@ class TTSResponse(BaseModel):
         },
         "result": {
             "lang": "zh",
-            "sample_rate": 24000,
+            "spk_id": 0,
             "speed": 1.0,
             "volume": 1.0,
+            "sample_rate": 24000,
+            "duration": 3.6125,
             "audio": "LTI1OTIuNjI1OTUwMzQsOTk2OS41NDk4...",
             "save_path": "./tts.wav"
         }

From 89457b273afacc94fa8e9cefa470e62e3c3a45b9 Mon Sep 17 00:00:00 2001
From: lym0302 <lym0302@foxmail.com>
Date: Thu, 10 Mar 2022 18:58:27 +0800
Subject: [PATCH 9/9] modify, test=doc

---
 demos/speech_server/README.md                  | 2 +-
 demos/speech_server/README_cn.md               | 2 +-
 paddlespeech/server/bin/paddlespeech_client.py | 1 -
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/demos/speech_server/README.md b/demos/speech_server/README.md
index 37a01698773..10489e71314 100644
--- a/demos/speech_server/README.md
+++ b/demos/speech_server/README.md
@@ -225,7 +225,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee
       server_ip="127.0.0.1",
       port=8090,
       topk=1)
-  print(res.jaon())
+  print(res.json())
   ```
 
   Output:
diff --git a/demos/speech_server/README_cn.md b/demos/speech_server/README_cn.md
index 2a18fb40bfa..2bd8af6c91f 100644
--- a/demos/speech_server/README_cn.md
+++ b/demos/speech_server/README_cn.md
@@ -228,7 +228,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee
       server_ip="127.0.0.1",
       port=8090,
       topk=1)
-  print(res.jaon())
+  print(res.json())
 
   ```
 
diff --git a/paddlespeech/server/bin/paddlespeech_client.py b/paddlespeech/server/bin/paddlespeech_client.py
index c13aadeb31d..40f17c63c8e 100644
--- a/paddlespeech/server/bin/paddlespeech_client.py
+++ b/paddlespeech/server/bin/paddlespeech_client.py
@@ -73,7 +73,6 @@ def __init__(self):
             '--output', type=str, default=None, help='Synthesized audio file')
 
     def postprocess(self, wav_base64: str, outfile: str) -> float:
-        #wav_base64 = response_dict["result"]["audio"]
         audio_data_byte = base64.b64decode(wav_base64)
         # from byte
         samples, sample_rate = soundfile.read(