From 6829588e1a2a149a1ae55169d4627f0752146fbe Mon Sep 17 00:00:00 2001 From: zhangjun Date: Thu, 11 Mar 2021 09:52:02 +0000 Subject: [PATCH 01/12] remove paddle_serving_server_gpu --- python/CMakeLists.txt | 162 ++-- python/gen_version.py | 2 +- python/paddle_serving_server/__init__.py | 737 +----------------- python/paddle_serving_server/dag.py | 97 +++ python/paddle_serving_server/monitor.py | 1 - python/paddle_serving_server/rpc_service.py | 160 ++++ python/paddle_serving_server/serve.py | 154 +++- .../server.py} | 364 +-------- python/paddle_serving_server/version.py | 18 - python/paddle_serving_server/web_service.py | 140 +++- python/paddle_serving_server_gpu/monitor.py | 504 ------------ python/paddle_serving_server_gpu/serve.py | 261 ------- python/paddle_serving_server_gpu/version.py | 19 - .../paddle_serving_server_gpu/web_service.py | 310 -------- 14 files changed, 569 insertions(+), 2360 deletions(-) create mode 100644 python/paddle_serving_server/dag.py create mode 100644 python/paddle_serving_server/rpc_service.py rename python/{paddle_serving_server_gpu/__init__.py => paddle_serving_server/server.py} (61%) delete mode 100644 python/paddle_serving_server/version.py delete mode 100644 python/paddle_serving_server_gpu/monitor.py delete mode 100644 python/paddle_serving_server_gpu/serve.py delete mode 100644 python/paddle_serving_server_gpu/version.py delete mode 100644 python/paddle_serving_server_gpu/web_service.py diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index a44027d90..924c79794 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -1,46 +1,36 @@ if (CLIENT) - file(INSTALL pipeline DESTINATION paddle_serving_client) - file(GLOB_RECURSE SERVING_CLIENT_PY_FILES paddle_serving_client/*.py) - set(PY_FILES ${SERVING_CLIENT_PY_FILES}) - SET(PACKAGE_NAME "serving_client") - set(SETUP_LOG_FILE "setup.py.client.log") + file(INSTALL pipeline DESTINATION paddle_serving_client) + file(GLOB_RECURSE SERVING_CLIENT_PY_FILES paddle_serving_client/*.py) + set(PY_FILES ${SERVING_CLIENT_PY_FILES}) + SET(PACKAGE_NAME "serving_client") + set(SETUP_LOG_FILE "setup.py.client.log") endif() if (SERVER) - if (NOT WITH_GPU AND NOT WITH_LITE) - file(INSTALL pipeline DESTINATION paddle_serving_server) - file(GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server/*.py) - else() - file(INSTALL pipeline DESTINATION paddle_serving_server_gpu) - file(GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server_gpu/*.py) - endif() - set(PY_FILES ${SERVING_SERVER_PY_FILES}) - SET(PACKAGE_NAME "serving_server") - set(SETUP_LOG_FILE "setup.py.server.log") + file(INSTALL pipeline DESTINATION paddle_serving_server) + file(GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server/*.py) + set(PY_FILES ${SERVING_SERVER_PY_FILES}) + SET(PACKAGE_NAME "serving_server") + set(SETUP_LOG_FILE "setup.py.server.log") endif() configure_file(${CMAKE_CURRENT_SOURCE_DIR}/util.py ${CMAKE_CURRENT_BINARY_DIR}/util.py) if (CLIENT) -configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.client.in + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.client.in ${CMAKE_CURRENT_BINARY_DIR}/setup.py) -configure_file(${CMAKE_CURRENT_SOURCE_DIR}/../tools/python_tag.py + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/../tools/python_tag.py ${CMAKE_CURRENT_BINARY_DIR}/python_tag.py) endif() if (APP) -configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.app.in + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.app.in ${CMAKE_CURRENT_BINARY_DIR}/setup.py) endif() if (SERVER) - if (NOT WITH_GPU AND NOT WITH_LITE) - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.server.in - ${CMAKE_CURRENT_BINARY_DIR}/setup.py) - else() - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.server_gpu.in - ${CMAKE_CURRENT_BINARY_DIR}/setup.py) - endif() + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.server.in + ${CMAKE_CURRENT_BINARY_DIR}/setup.py) endif() configure_file(${CMAKE_CURRENT_SOURCE_DIR}/gen_version.py @@ -50,108 +40,66 @@ set (SERVING_CLIENT_CORE ${PADDLE_SERVING_BINARY_DIR}/core/general-client/*.so) message("python env: " ${py_env}) if (APP) -add_custom_command( - OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp - COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_app/ ${PADDLE_SERVING_BINARY_DIR}/python/ - COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py "app" - COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel - DEPENDS ${SERVING_APP_CORE} general_model_config_py_proto ${PY_FILES}) -add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp) + add_custom_command( + OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp + COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_app/ ${PADDLE_SERVING_BINARY_DIR}/python/ + COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py "app" + COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel + DEPENDS ${SERVING_APP_CORE} general_model_config_py_proto ${PY_FILES}) + add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp) endif() if (CLIENT) -add_custom_command( - OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp - COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_client/ ${PADDLE_SERVING_BINARY_DIR}/python/ - COMMAND ${CMAKE_COMMAND} -E copy ${SERVING_CLIENT_CORE} ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/serving_client.so + add_custom_command( + OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp + COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_client/ ${PADDLE_SERVING_BINARY_DIR}/python/ + COMMAND ${CMAKE_COMMAND} -E copy ${SERVING_CLIENT_CORE} ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/serving_client.so COMMAND env ${py_env} ${PYTHON_EXECUTABLE} python_tag.py COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py "client" - COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel - DEPENDS ${SERVING_CLIENT_CORE} sdk_configure_py_proto ${PY_FILES}) -add_custom_target(paddle_python ALL DEPENDS serving_client ${PADDLE_SERVING_BINARY_DIR}/.timestamp) + COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel + DEPENDS ${SERVING_CLIENT_CORE} sdk_configure_py_proto ${PY_FILES}) + add_custom_target(paddle_python ALL DEPENDS serving_client ${PADDLE_SERVING_BINARY_DIR}/.timestamp) endif() if (SERVER) - if(NOT WITH_GPU AND NOT WITH_LITE) - add_custom_command( - OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp - COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server/ ${PADDLE_SERVING_BINARY_DIR}/python/ - COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py "server" - COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel - DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES}) - add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp) - elseif(WITH_TRT) - if(CUDA_VERSION EQUAL 10.1) - set(SUFFIX 101) - elseif(CUDA_VERSION EQUAL 10.2) - set(SUFFIX 102) - elseif(CUDA_VERSION EQUAL 11.0) - set(SUFFIX 11) + if(CUDA_VERSION EQUAL 10.1) + set(SUFFIX 101) + elseif(CUDA_VERSION EQUAL 10.2) + set(SUFFIX 102) + elseif(CUDA_VERSION EQUAL 11.0) + set(SUFFIX 11) + endif() - endif() - add_custom_command( - OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp - COMMAND cp -r - ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/ - COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py - "server_gpu" ${SUFFIX} - COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel - DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES}) - add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp) - elseif(WITH_LITE) - if(WITH_XPU) - add_custom_command( - OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp - COMMAND cp -r - ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/ - COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py - "server_gpu" arm-xpu - COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel - DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES}) - add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp) - else() - add_custom_command( - OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp - COMMAND cp -r - ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/ - COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py - "server_gpu" arm - COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel - DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES}) - add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp) - endif() - else() - add_custom_command( - OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp - COMMAND cp -r - ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/ - COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py - "server_gpu" ${CUDA_VERSION_MAJOR} - COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel - DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES}) - add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp) - endif() + add_custom_command( + OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp + COMMAND cp -r + ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server/ ${PADDLE_SERVING_BINARY_DIR}/python/ + COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py + "server" ${VERSION_SUFFIX} + COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel + DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES}) + add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp) endif() set(SERVING_CLIENT_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/dist/) set(SERVING_SERVER_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/dist/) if (CLIENT) -install(DIRECTORY ${SERVING_CLIENT_PYTHON_PACKAGE_DIR} + install(DIRECTORY ${SERVING_CLIENT_PYTHON_PACKAGE_DIR} DESTINATION opt/serving_client/share/wheels -) + ) endif() if (SERVER) -install(DIRECTORY ${SERVING_SERVER_PYTHON_PACKAGE_DIR} - DESTINATION opt/serving_server/share/wheels -) + install(DIRECTORY ${SERVING_SERVER_PYTHON_PACKAGE_DIR} + DESTINATION opt/serving_server/share/wheels + ) endif() if (CLIENT OR SERVER) -find_program(PATCHELF_EXECUTABLE patchelf) -if (NOT PATCHELF_EXECUTABLE) - message(FATAL_ERROR "patchelf not found, please install it.\n" - "For Ubuntu, the command is: apt-get install -y patchelf.") -endif() + find_program(PATCHELF_EXECUTABLE patchelf) + if (NOT PATCHELF_EXECUTABLE) + message(FATAL_ERROR "patchelf not found, please install it.\n" + "For Ubuntu, the command is: apt-get install -y patchelf.") + endif() endif() diff --git a/python/gen_version.py b/python/gen_version.py index 258905f58..a13c52774 100644 --- a/python/gen_version.py +++ b/python/gen_version.py @@ -35,7 +35,7 @@ def update_info(file_name, feature, info): if len(sys.argv) > 2: - update_info("paddle_serving_server_gpu/version.py", "cuda_version", + update_info("paddle_serving_server/version.py", "cuda_version", sys.argv[2]) path = "paddle_serving_" + sys.argv[1] diff --git a/python/paddle_serving_server/__init__.py b/python/paddle_serving_server/__init__.py index 4d0832b67..102695c88 100644 --- a/python/paddle_serving_server/__init__.py +++ b/python/paddle_serving_server/__init__.py @@ -13,737 +13,8 @@ # limitations under the License. # pylint: disable=doc-string-missing -import os -from .proto import server_configure_pb2 as server_sdk -from .proto import general_model_config_pb2 as m_config -import google.protobuf.text_format -import tarfile -import socket -import paddle_serving_server as paddle_serving_server -from .version import serving_server_version -from contextlib import closing -import collections -import shutil -import numpy as np -import grpc -from .proto import multi_lang_general_model_service_pb2 -import sys -if sys.platform.startswith('win') is False: - import fcntl -sys.path.append( - os.path.join(os.path.abspath(os.path.dirname(__file__)), 'proto')) -from .proto import multi_lang_general_model_service_pb2_grpc -from multiprocessing import Pool, Process -from concurrent import futures +SERVER_VERSION = "0.0.0" - -class OpMaker(object): - def __init__(self): - self.op_dict = { - "general_infer": "GeneralInferOp", - "general_reader": "GeneralReaderOp", - "general_response": "GeneralResponseOp", - "general_text_reader": "GeneralTextReaderOp", - "general_text_response": "GeneralTextResponseOp", - "general_single_kv": "GeneralSingleKVOp", - "general_dist_kv_infer": "GeneralDistKVInferOp", - "general_dist_kv_quant_infer": "GeneralDistKVQuantInferOp", - "general_copy": "GeneralCopyOp" - } - self.node_name_suffix_ = collections.defaultdict(int) - - def create(self, node_type, engine_name=None, inputs=[], outputs=[]): - if node_type not in self.op_dict: - raise Exception("Op type {} is not supported right now".format( - node_type)) - node = server_sdk.DAGNode() - # node.name will be used as the infer engine name - if engine_name: - node.name = engine_name - else: - node.name = '{}_{}'.format(node_type, - self.node_name_suffix_[node_type]) - self.node_name_suffix_[node_type] += 1 - - node.type = self.op_dict[node_type] - if inputs: - for dep_node_str in inputs: - dep_node = server_sdk.DAGNode() - google.protobuf.text_format.Parse(dep_node_str, dep_node) - dep = server_sdk.DAGNodeDependency() - dep.name = dep_node.name - dep.mode = "RO" - node.dependencies.extend([dep]) - # Because the return value will be used as the key value of the - # dict, and the proto object is variable which cannot be hashed, - # so it is processed into a string. This has little effect on - # overall efficiency. - return google.protobuf.text_format.MessageToString(node) - - -class OpSeqMaker(object): - def __init__(self): - self.workflow = server_sdk.Workflow() - self.workflow.name = "workflow1" - self.workflow.workflow_type = "Sequence" - - def add_op(self, node_str): - node = server_sdk.DAGNode() - google.protobuf.text_format.Parse(node_str, node) - if len(node.dependencies) > 1: - raise Exception( - 'Set more than one predecessor for op in OpSeqMaker is not allowed.' - ) - if len(self.workflow.nodes) >= 1: - if len(node.dependencies) == 0: - dep = server_sdk.DAGNodeDependency() - dep.name = self.workflow.nodes[-1].name - dep.mode = "RO" - node.dependencies.extend([dep]) - elif len(node.dependencies) == 1: - if node.dependencies[0].name != self.workflow.nodes[-1].name: - raise Exception( - 'You must add op in order in OpSeqMaker. The previous op is {}, but the current op is followed by {}.' - .format(node.dependencies[0].name, self.workflow.nodes[ - -1].name)) - self.workflow.nodes.extend([node]) - - def get_op_sequence(self): - workflow_conf = server_sdk.WorkflowConf() - workflow_conf.workflows.extend([self.workflow]) - return workflow_conf - - -class OpGraphMaker(object): - def __init__(self): - self.workflow = server_sdk.Workflow() - self.workflow.name = "workflow1" - # Currently, SDK only supports "Sequence" - self.workflow.workflow_type = "Sequence" - - def add_op(self, node_str): - node = server_sdk.DAGNode() - google.protobuf.text_format.Parse(node_str, node) - self.workflow.nodes.extend([node]) - - def get_op_graph(self): - workflow_conf = server_sdk.WorkflowConf() - workflow_conf.workflows.extend([self.workflow]) - return workflow_conf - - -class Server(object): - def __init__(self): - self.server_handle_ = None - self.infer_service_conf = None - self.model_toolkit_conf = None - self.resource_conf = None - self.memory_optimization = False - self.ir_optimization = False - self.model_conf = None - self.workflow_fn = "workflow.prototxt" - self.resource_fn = "resource.prototxt" - self.infer_service_fn = "infer_service.prototxt" - self.model_toolkit_fn = "model_toolkit.prototxt" - self.general_model_config_fn = "general_model.prototxt" - self.cube_config_fn = "cube.conf" - self.workdir = "" - self.max_concurrency = 0 - self.num_threads = 4 - self.port = 8080 - self.reload_interval_s = 10 - self.max_body_size = 64 * 1024 * 1024 - self.module_path = os.path.dirname(paddle_serving_server.__file__) - self.cur_path = os.getcwd() - self.use_local_bin = False - self.mkl_flag = False - self.encryption_model = False - self.product_name = None - self.container_id = None - self.model_config_paths = None # for multi-model in a workflow - - def get_fetch_list(self): - fetch_names = [var.alias_name for var in self.model_conf.fetch_var] - return fetch_names - - def set_max_concurrency(self, concurrency): - self.max_concurrency = concurrency - - def set_num_threads(self, threads): - self.num_threads = threads - - def set_max_body_size(self, body_size): - if body_size >= self.max_body_size: - self.max_body_size = body_size - else: - print( - "max_body_size is less than default value, will use default value in service." - ) - - def set_port(self, port): - self.port = port - - def set_reload_interval(self, interval): - self.reload_interval_s = interval - - def set_op_sequence(self, op_seq): - self.workflow_conf = op_seq - - def set_op_graph(self, op_graph): - self.workflow_conf = op_graph - - def set_memory_optimize(self, flag=False): - self.memory_optimization = flag - - def set_ir_optimize(self, flag=False): - self.ir_optimization = flag - - def use_encryption_model(self, flag=False): - self.encryption_model = flag - - def set_product_name(self, product_name=None): - if product_name == None: - raise ValueError("product_name can't be None.") - self.product_name = product_name - - def set_container_id(self, container_id): - if container_id == None: - raise ValueError("container_id can't be None.") - self.container_id = container_id - - def check_local_bin(self): - if "SERVING_BIN" in os.environ: - self.use_local_bin = True - self.bin_path = os.environ["SERVING_BIN"] - - def _prepare_engine(self, model_config_paths, device): - if self.model_toolkit_conf == None: - self.model_toolkit_conf = server_sdk.ModelToolkitConf() - - for engine_name, model_config_path in model_config_paths.items(): - engine = server_sdk.EngineDesc() - engine.name = engine_name - engine.reloadable_meta = model_config_path + "/fluid_time_file" - os.system("touch {}".format(engine.reloadable_meta)) - engine.reloadable_type = "timestamp_ne" - engine.runtime_thread_num = 0 - engine.batch_infer_size = 0 - engine.enable_batch_align = 0 - engine.model_data_path = model_config_path - engine.enable_memory_optimization = self.memory_optimization - engine.enable_ir_optimization = self.ir_optimization - engine.static_optimization = False - engine.force_update_static_cache = False - if os.path.exists('{}/__params__'.format(model_config_path)): - suffix = "" - else: - suffix = "_DIR" - - if device == "cpu": - if self.encryption_model: - engine.type = "FLUID_CPU_ANALYSIS_ENCRYPT" - else: - engine.type = "FLUID_CPU_ANALYSIS" + suffix - elif device == "gpu": - if self.encryption_model: - engine.type = "FLUID_GPU_ANALYSIS_ENCRYPT" - else: - engine.type = "FLUID_GPU_ANALYSIS" + suffix - - self.model_toolkit_conf.engines.extend([engine]) - - def _prepare_infer_service(self, port): - if self.infer_service_conf == None: - self.infer_service_conf = server_sdk.InferServiceConf() - self.infer_service_conf.port = port - infer_service = server_sdk.InferService() - infer_service.name = "GeneralModelService" - infer_service.workflows.extend(["workflow1"]) - self.infer_service_conf.services.extend([infer_service]) - - def _prepare_resource(self, workdir, cube_conf): - self.workdir = workdir - if self.resource_conf == None: - with open("{}/{}".format(workdir, self.general_model_config_fn), - "w") as fout: - fout.write(str(self.model_conf)) - self.resource_conf = server_sdk.ResourceConf() - for workflow in self.workflow_conf.workflows: - for node in workflow.nodes: - if "dist_kv" in node.name: - self.resource_conf.cube_config_path = workdir - self.resource_conf.cube_config_file = self.cube_config_fn - if cube_conf == None: - raise ValueError( - "Please set the path of cube.conf while use dist_kv op." - ) - shutil.copy(cube_conf, workdir) - if "quant" in node.name: - self.resource_conf.cube_quant_bits = 8 - self.resource_conf.model_toolkit_path = workdir - self.resource_conf.model_toolkit_file = self.model_toolkit_fn - self.resource_conf.general_model_path = workdir - self.resource_conf.general_model_file = self.general_model_config_fn - if self.product_name != None: - self.resource_conf.auth_product_name = self.product_name - if self.container_id != None: - self.resource_conf.auth_container_id = self.container_id - - def _write_pb_str(self, filepath, pb_obj): - with open(filepath, "w") as fout: - fout.write(str(pb_obj)) - - def load_model_config(self, model_config_paths): - # At present, Serving needs to configure the model path in - # the resource.prototxt file to determine the input and output - # format of the workflow. To ensure that the input and output - # of multiple models are the same. - workflow_oi_config_path = None - if isinstance(model_config_paths, str): - # If there is only one model path, use the default infer_op. - # Because there are several infer_op type, we need to find - # it from workflow_conf. - default_engine_names = [ - 'general_infer_0', 'general_dist_kv_infer_0', - 'general_dist_kv_quant_infer_0' - ] - engine_name = None - for node in self.workflow_conf.workflows[0].nodes: - if node.name in default_engine_names: - engine_name = node.name - break - if engine_name is None: - raise Exception( - "You have set the engine_name of Op. Please use the form {op: model_path} to configure model path" - ) - self.model_config_paths = {engine_name: model_config_paths} - workflow_oi_config_path = self.model_config_paths[engine_name] - elif isinstance(model_config_paths, dict): - self.model_config_paths = {} - for node_str, path in model_config_paths.items(): - node = server_sdk.DAGNode() - google.protobuf.text_format.Parse(node_str, node) - self.model_config_paths[node.name] = path - print("You have specified multiple model paths, please ensure " - "that the input and output of multiple models are the same.") - workflow_oi_config_path = list(self.model_config_paths.items())[0][ - 1] - else: - raise Exception("The type of model_config_paths must be str or " - "dict({op: model_path}), not {}.".format( - type(model_config_paths))) - - self.model_conf = m_config.GeneralModelConfig() - f = open( - "{}/serving_server_conf.prototxt".format(workflow_oi_config_path), - 'r') - self.model_conf = google.protobuf.text_format.Merge( - str(f.read()), self.model_conf) - # check config here - # print config here - - def use_mkl(self, flag): - self.mkl_flag = flag - - def get_device_version(self): - avx_flag = False - mkl_flag = self.mkl_flag - openblas_flag = False - r = os.system("cat /proc/cpuinfo | grep avx > /dev/null 2>&1") - if r == 0: - avx_flag = True - if avx_flag: - if mkl_flag: - device_version = "serving-cpu-avx-mkl-" - else: - device_version = "serving-cpu-avx-openblas-" - else: - if mkl_flag: - print( - "Your CPU does not support AVX, server will running with noavx-openblas mode." - ) - device_version = "serving-cpu-noavx-openblas-" - return device_version - - def download_bin(self): - os.chdir(self.module_path) - need_download = False - device_version = self.get_device_version() - folder_name = device_version + serving_server_version - tar_name = folder_name + ".tar.gz" - bin_url = "https://paddle-serving.bj.bcebos.com/bin/" + tar_name - self.server_path = os.path.join(self.module_path, folder_name) - - #acquire lock - version_file = open("{}/version.py".format(self.module_path), "r") - fcntl.flock(version_file, fcntl.LOCK_EX) - - if not os.path.exists(self.server_path): - print('Frist time run, downloading PaddleServing components ...') - r = os.system('wget ' + bin_url + ' --no-check-certificate') - if r != 0: - if os.path.exists(tar_name): - os.remove(tar_name) - raise SystemExit( - 'Download failed, please check your network or permission of {}.' - .format(self.module_path)) - else: - try: - print('Decompressing files ..') - tar = tarfile.open(tar_name) - tar.extractall() - tar.close() - except: - if os.path.exists(exe_path): - os.remove(exe_path) - raise SystemExit( - 'Decompressing failed, please check your permission of {} or disk space left.' - .format(self.module_path)) - finally: - os.remove(tar_name) - #release lock - version_file.close() - os.chdir(self.cur_path) - self.bin_path = self.server_path + "/serving" - - def prepare_server(self, - workdir=None, - port=9292, - device="cpu", - cube_conf=None): - if workdir == None: - workdir = "./tmp" - os.system("mkdir {}".format(workdir)) - else: - os.system("mkdir {}".format(workdir)) - os.system("touch {}/fluid_time_file".format(workdir)) - - if not self.port_is_available(port): - raise SystemExit("Port {} is already used".format(port)) - self.set_port(port) - self._prepare_resource(workdir, cube_conf) - self._prepare_engine(self.model_config_paths, device) - self._prepare_infer_service(port) - self.workdir = workdir - - infer_service_fn = "{}/{}".format(workdir, self.infer_service_fn) - workflow_fn = "{}/{}".format(workdir, self.workflow_fn) - resource_fn = "{}/{}".format(workdir, self.resource_fn) - model_toolkit_fn = "{}/{}".format(workdir, self.model_toolkit_fn) - - self._write_pb_str(infer_service_fn, self.infer_service_conf) - self._write_pb_str(workflow_fn, self.workflow_conf) - self._write_pb_str(resource_fn, self.resource_conf) - self._write_pb_str(model_toolkit_fn, self.model_toolkit_conf) - - def port_is_available(self, port): - with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: - sock.settimeout(2) - result = sock.connect_ex(('0.0.0.0', port)) - if result != 0: - return True - else: - return False - - def run_server(self): - # just run server with system command - # currently we do not load cube - self.check_local_bin() - if not self.use_local_bin: - self.download_bin() - else: - print("Use local bin : {}".format(self.bin_path)) - command = "{} " \ - "-enable_model_toolkit " \ - "-inferservice_path {} " \ - "-inferservice_file {} " \ - "-max_concurrency {} " \ - "-num_threads {} " \ - "-port {} " \ - "-reload_interval_s {} " \ - "-resource_path {} " \ - "-resource_file {} " \ - "-workflow_path {} " \ - "-workflow_file {} " \ - "-bthread_concurrency {} " \ - "-max_body_size {} ".format( - self.bin_path, - self.workdir, - self.infer_service_fn, - self.max_concurrency, - self.num_threads, - self.port, - self.reload_interval_s, - self.workdir, - self.resource_fn, - self.workdir, - self.workflow_fn, - self.num_threads, - self.max_body_size) - print("Going to Run Command") - print(command) - os.system(command) - - -class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc. - MultiLangGeneralModelServiceServicer): - def __init__(self, model_config_path, is_multi_model, endpoints): - self.is_multi_model_ = is_multi_model - self.model_config_path_ = model_config_path - self.endpoints_ = endpoints - with open(self.model_config_path_) as f: - self.model_config_str_ = str(f.read()) - self._parse_model_config(self.model_config_str_) - self._init_bclient(self.model_config_path_, self.endpoints_) - - def _init_bclient(self, model_config_path, endpoints, timeout_ms=None): - from paddle_serving_client import Client - self.bclient_ = Client() - if timeout_ms is not None: - self.bclient_.set_rpc_timeout_ms(timeout_ms) - self.bclient_.load_client_config(model_config_path) - self.bclient_.connect(endpoints) - - def _parse_model_config(self, model_config_str): - model_conf = m_config.GeneralModelConfig() - model_conf = google.protobuf.text_format.Merge(model_config_str, - model_conf) - self.feed_names_ = [var.alias_name for var in model_conf.feed_var] - self.feed_types_ = {} - self.feed_shapes_ = {} - self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var] - self.fetch_types_ = {} - self.lod_tensor_set_ = set() - for i, var in enumerate(model_conf.feed_var): - self.feed_types_[var.alias_name] = var.feed_type - self.feed_shapes_[var.alias_name] = var.shape - if var.is_lod_tensor: - self.lod_tensor_set_.add(var.alias_name) - for i, var in enumerate(model_conf.fetch_var): - self.fetch_types_[var.alias_name] = var.fetch_type - if var.is_lod_tensor: - self.lod_tensor_set_.add(var.alias_name) - - def _flatten_list(self, nested_list): - for item in nested_list: - if isinstance(item, (list, tuple)): - for sub_item in self._flatten_list(item): - yield sub_item - else: - yield item - - def _unpack_inference_request(self, request): - feed_names = list(request.feed_var_names) - fetch_names = list(request.fetch_var_names) - is_python = request.is_python - log_id = request.log_id - feed_batch = [] - for feed_inst in request.insts: - feed_dict = {} - for idx, name in enumerate(feed_names): - var = feed_inst.tensor_array[idx] - v_type = self.feed_types_[name] - data = None - if is_python: - if v_type == 0: # int64 - data = np.frombuffer(var.data, dtype="int64") - elif v_type == 1: # float32 - data = np.frombuffer(var.data, dtype="float32") - elif v_type == 2: # int32 - data = np.frombuffer(var.data, dtype="int32") - else: - raise Exception("error type.") - else: - if v_type == 0: # int64 - data = np.array(list(var.int64_data), dtype="int64") - elif v_type == 1: # float32 - data = np.array(list(var.float_data), dtype="float32") - elif v_type == 2: # int32 - data = np.array(list(var.int_data), dtype="int32") - else: - raise Exception("error type.") - data.shape = list(feed_inst.tensor_array[idx].shape) - feed_dict[name] = data - if len(var.lod) > 0: - feed_dict["{}.lod".format(name)] = var.lod - feed_batch.append(feed_dict) - return feed_batch, fetch_names, is_python, log_id - - def _pack_inference_response(self, ret, fetch_names, is_python): - resp = multi_lang_general_model_service_pb2.InferenceResponse() - if ret is None: - resp.err_code = 1 - return resp - results, tag = ret - resp.tag = tag - resp.err_code = 0 - if not self.is_multi_model_: - results = {'general_infer_0': results} - for model_name, model_result in results.items(): - model_output = multi_lang_general_model_service_pb2.ModelOutput() - inst = multi_lang_general_model_service_pb2.FetchInst() - for idx, name in enumerate(fetch_names): - tensor = multi_lang_general_model_service_pb2.Tensor() - v_type = self.fetch_types_[name] - if is_python: - tensor.data = model_result[name].tobytes() - else: - if v_type == 0: # int64 - tensor.int64_data.extend(model_result[name].reshape(-1) - .tolist()) - elif v_type == 1: # float32 - tensor.float_data.extend(model_result[name].reshape(-1) - .tolist()) - elif v_type == 2: # int32 - tensor.int_data.extend(model_result[name].reshape(-1) - .tolist()) - else: - raise Exception("error type.") - tensor.shape.extend(list(model_result[name].shape)) - if "{}.lod".format(name) in model_result: - tensor.lod.extend(model_result["{}.lod".format(name)] - .tolist()) - inst.tensor_array.append(tensor) - model_output.insts.append(inst) - model_output.engine_name = model_name - resp.outputs.append(model_output) - return resp - - def SetTimeout(self, request, context): - # This porcess and Inference process cannot be operate at the same time. - # For performance reasons, do not add thread lock temporarily. - timeout_ms = request.timeout_ms - self._init_bclient(self.model_config_path_, self.endpoints_, timeout_ms) - resp = multi_lang_general_model_service_pb2.SimpleResponse() - resp.err_code = 0 - return resp - - def Inference(self, request, context): - feed_batch, fetch_names, is_python, log_id = \ - self._unpack_inference_request(request) - ret = self.bclient_.predict( - feed=feed_batch, - fetch=fetch_names, - batch=True, - need_variant_tag=True, - log_id=log_id) - return self._pack_inference_response(ret, fetch_names, is_python) - - def GetClientConfig(self, request, context): - resp = multi_lang_general_model_service_pb2.GetClientConfigResponse() - resp.client_config_str = self.model_config_str_ - return resp - - -class MultiLangServer(object): - def __init__(self): - self.bserver_ = Server() - self.worker_num_ = 4 - self.body_size_ = 64 * 1024 * 1024 - self.concurrency_ = 100000 - self.is_multi_model_ = False # for model ensemble - - def set_max_concurrency(self, concurrency): - self.concurrency_ = concurrency - self.bserver_.set_max_concurrency(concurrency) - - def set_num_threads(self, threads): - self.worker_num_ = threads - self.bserver_.set_num_threads(threads) - - def set_max_body_size(self, body_size): - self.bserver_.set_max_body_size(body_size) - if body_size >= self.body_size_: - self.body_size_ = body_size - else: - print( - "max_body_size is less than default value, will use default value in service." - ) - - def use_encryption_model(self, flag=False): - self.encryption_model = flag - - def set_port(self, port): - self.gport_ = port - - def set_reload_interval(self, interval): - self.bserver_.set_reload_interval(interval) - - def set_op_sequence(self, op_seq): - self.bserver_.set_op_sequence(op_seq) - - def set_op_graph(self, op_graph): - self.bserver_.set_op_graph(op_graph) - - def set_memory_optimize(self, flag=False): - self.bserver_.set_memory_optimize(flag) - - def set_ir_optimize(self, flag=False): - self.bserver_.set_ir_optimize(flag) - - def set_op_sequence(self, op_seq): - self.bserver_.set_op_sequence(op_seq) - - def use_mkl(self, flag): - self.bserver_.use_mkl(flag) - - def load_model_config(self, server_config_paths, client_config_path=None): - self.bserver_.load_model_config(server_config_paths) - if client_config_path is None: - if isinstance(server_config_paths, dict): - self.is_multi_model_ = True - client_config_path = '{}/serving_server_conf.prototxt'.format( - list(server_config_paths.items())[0][1]) - else: - client_config_path = '{}/serving_server_conf.prototxt'.format( - server_config_paths) - self.bclient_config_path_ = client_config_path - - def prepare_server(self, - workdir=None, - port=9292, - device="cpu", - cube_conf=None): - if not self._port_is_available(port): - raise SystemExit("Prot {} is already used".format(port)) - default_port = 12000 - self.port_list_ = [] - for i in range(1000): - if default_port + i != port and self._port_is_available(default_port - + i): - self.port_list_.append(default_port + i) - break - self.bserver_.prepare_server( - workdir=workdir, - port=self.port_list_[0], - device=device, - cube_conf=cube_conf) - self.set_port(port) - - def _launch_brpc_service(self, bserver): - bserver.run_server() - - def _port_is_available(self, port): - with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: - sock.settimeout(2) - result = sock.connect_ex(('0.0.0.0', port)) - return result != 0 - - def run_server(self): - p_bserver = Process( - target=self._launch_brpc_service, args=(self.bserver_, )) - p_bserver.start() - options = [('grpc.max_send_message_length', self.body_size_), - ('grpc.max_receive_message_length', self.body_size_)] - server = grpc.server( - futures.ThreadPoolExecutor(max_workers=self.worker_num_), - options=options, - maximum_concurrent_rpcs=self.concurrency_) - multi_lang_general_model_service_pb2_grpc.add_MultiLangGeneralModelServiceServicer_to_server( - MultiLangServerServiceServicer( - self.bclient_config_path_, self.is_multi_model_, - ["0.0.0.0:{}".format(self.port_list_[0])]), server) - server.add_insecure_port('[::]:{}'.format(self.gport_)) - server.start() - p_bserver.join() - server.wait_for_termination() +__version__ = SERVER_VERSION +cuda_version = "9" +commit_id = "" \ No newline at end of file diff --git a/python/paddle_serving_server/dag.py b/python/paddle_serving_server/dag.py new file mode 100644 index 000000000..f0b410d61 --- /dev/null +++ b/python/paddle_serving_server/dag.py @@ -0,0 +1,97 @@ + +from .proto import server_configure_pb2 as server_sdk +import google.protobuf.text_format +import collections + +class OpMaker(object): + def __init__(self): + self.op_dict = { + "general_infer": "GeneralInferOp", + "general_reader": "GeneralReaderOp", + "general_response": "GeneralResponseOp", + "general_text_reader": "GeneralTextReaderOp", + "general_text_response": "GeneralTextResponseOp", + "general_single_kv": "GeneralSingleKVOp", + "general_dist_kv_infer": "GeneralDistKVInferOp", + "general_dist_kv": "GeneralDistKVOp" + } + self.node_name_suffix_ = collections.defaultdict(int) + + def create(self, node_type, engine_name=None, inputs=[], outputs=[]): + if node_type not in self.op_dict: + raise Exception("Op type {} is not supported right now".format( + node_type)) + node = server_sdk.DAGNode() + # node.name will be used as the infer engine name + if engine_name: + node.name = engine_name + else: + node.name = '{}_{}'.format(node_type, + self.node_name_suffix_[node_type]) + self.node_name_suffix_[node_type] += 1 + + node.type = self.op_dict[node_type] + if inputs: + for dep_node_str in inputs: + dep_node = server_sdk.DAGNode() + google.protobuf.text_format.Parse(dep_node_str, dep_node) + dep = server_sdk.DAGNodeDependency() + dep.name = dep_node.name + dep.mode = "RO" + node.dependencies.extend([dep]) + # Because the return value will be used as the key value of the + # dict, and the proto object is variable which cannot be hashed, + # so it is processed into a string. This has little effect on + # overall efficiency. + return google.protobuf.text_format.MessageToString(node) + + +class OpSeqMaker(object): + def __init__(self): + self.workflow = server_sdk.Workflow() + self.workflow.name = "workflow1" + self.workflow.workflow_type = "Sequence" + + def add_op(self, node_str): + node = server_sdk.DAGNode() + google.protobuf.text_format.Parse(node_str, node) + if len(node.dependencies) > 1: + raise Exception( + 'Set more than one predecessor for op in OpSeqMaker is not allowed.' + ) + if len(self.workflow.nodes) >= 1: + if len(node.dependencies) == 0: + dep = server_sdk.DAGNodeDependency() + dep.name = self.workflow.nodes[-1].name + dep.mode = "RO" + node.dependencies.extend([dep]) + elif len(node.dependencies) == 1: + if node.dependencies[0].name != self.workflow.nodes[-1].name: + raise Exception( + 'You must add op in order in OpSeqMaker. The previous op is {}, but the current op is followed by {}.' + .format(node.dependencies[0].name, self.workflow.nodes[ + -1].name)) + self.workflow.nodes.extend([node]) + + def get_op_sequence(self): + workflow_conf = server_sdk.WorkflowConf() + workflow_conf.workflows.extend([self.workflow]) + return workflow_conf + + +class OpGraphMaker(object): + def __init__(self): + self.workflow = server_sdk.Workflow() + self.workflow.name = "workflow1" + # Currently, SDK only supports "Sequence" + self.workflow.workflow_type = "Sequence" + + def add_op(self, node_str): + node = server_sdk.DAGNode() + google.protobuf.text_format.Parse(node_str, node) + self.workflow.nodes.extend([node]) + + def get_op_graph(self): + workflow_conf = server_sdk.WorkflowConf() + workflow_conf.workflows.extend([self.workflow]) + return workflow_conf diff --git a/python/paddle_serving_server/monitor.py b/python/paddle_serving_server/monitor.py index 84146039c..4e8569836 100644 --- a/python/paddle_serving_server/monitor.py +++ b/python/paddle_serving_server/monitor.py @@ -28,7 +28,6 @@ _LOGGER = logging.getLogger(__name__) - class Monitor(object): ''' Monitor base class. It is used to monitor the remote model, pull and update the local model. diff --git a/python/paddle_serving_server/rpc_service.py b/python/paddle_serving_server/rpc_service.py new file mode 100644 index 000000000..6f695e92e --- /dev/null +++ b/python/paddle_serving_server/rpc_service.py @@ -0,0 +1,160 @@ +import sys +import os + +import google.protobuf.text_format + +from .proto import general_model_config_pb2 as m_config +from .proto import multi_lang_general_model_service_pb2 +sys.path.append( + os.path.join(os.path.abspath(os.path.dirname(__file__)), 'proto')) +from .proto import multi_lang_general_model_service_pb2_grpc + +class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc. + MultiLangGeneralModelServiceServicer): + def __init__(self, model_config_path, is_multi_model, endpoints): + self.is_multi_model_ = is_multi_model + self.model_config_path_ = model_config_path + self.endpoints_ = endpoints + with open(self.model_config_path_) as f: + self.model_config_str_ = str(f.read()) + self._parse_model_config(self.model_config_str_) + self._init_bclient(self.model_config_path_, self.endpoints_) + + def _init_bclient(self, model_config_path, endpoints, timeout_ms=None): + from paddle_serving_client import Client + self.bclient_ = Client() + if timeout_ms is not None: + self.bclient_.set_rpc_timeout_ms(timeout_ms) + self.bclient_.load_client_config(model_config_path) + self.bclient_.connect(endpoints) + + def _parse_model_config(self, model_config_str): + model_conf = m_config.GeneralModelConfig() + model_conf = google.protobuf.text_format.Merge(model_config_str, + model_conf) + self.feed_names_ = [var.alias_name for var in model_conf.feed_var] + self.feed_types_ = {} + self.feed_shapes_ = {} + self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var] + self.fetch_types_ = {} + self.lod_tensor_set_ = set() + for i, var in enumerate(model_conf.feed_var): + self.feed_types_[var.alias_name] = var.feed_type + self.feed_shapes_[var.alias_name] = var.shape + if var.is_lod_tensor: + self.lod_tensor_set_.add(var.alias_name) + for i, var in enumerate(model_conf.fetch_var): + self.fetch_types_[var.alias_name] = var.fetch_type + if var.is_lod_tensor: + self.lod_tensor_set_.add(var.alias_name) + + def _flatten_list(self, nested_list): + for item in nested_list: + if isinstance(item, (list, tuple)): + for sub_item in self._flatten_list(item): + yield sub_item + else: + yield item + + def _unpack_inference_request(self, request): + feed_names = list(request.feed_var_names) + fetch_names = list(request.fetch_var_names) + is_python = request.is_python + log_id = request.log_id + feed_batch = [] + for feed_inst in request.insts: + feed_dict = {} + for idx, name in enumerate(feed_names): + var = feed_inst.tensor_array[idx] + v_type = self.feed_types_[name] + data = None + if is_python: + if v_type == 0: + data = np.frombuffer(var.data, dtype="int64") + elif v_type == 1: + data = np.frombuffer(var.data, dtype="float32") + elif v_type == 2: + data = np.frombuffer(var.data, dtype="int32") + else: + raise Exception("error type.") + else: + if v_type == 0: # int64 + data = np.array(list(var.int64_data), dtype="int64") + elif v_type == 1: # float32 + data = np.array(list(var.float_data), dtype="float32") + elif v_type == 2: + data = np.array(list(var.int_data), dtype="int32") + else: + raise Exception("error type.") + data.shape = list(feed_inst.tensor_array[idx].shape) + feed_dict[name] = data + if len(var.lod) > 0: + feed_dict["{}.lod".format(name)] = var.lod + feed_batch.append(feed_dict) + return feed_batch, fetch_names, is_python, log_id + + def _pack_inference_response(self, ret, fetch_names, is_python): + resp = multi_lang_general_model_service_pb2.InferenceResponse() + if ret is None: + resp.err_code = 1 + return resp + results, tag = ret + resp.tag = tag + resp.err_code = 0 + + if not self.is_multi_model_: + results = {'general_infer_0': results} + for model_name, model_result in results.items(): + model_output = multi_lang_general_model_service_pb2.ModelOutput() + inst = multi_lang_general_model_service_pb2.FetchInst() + for idx, name in enumerate(fetch_names): + tensor = multi_lang_general_model_service_pb2.Tensor() + v_type = self.fetch_types_[name] + if is_python: + tensor.data = model_result[name].tobytes() + else: + if v_type == 0: # int64 + tensor.int64_data.extend(model_result[name].reshape(-1) + .tolist()) + elif v_type == 1: # float32 + tensor.float_data.extend(model_result[name].reshape(-1) + .tolist()) + elif v_type == 2: # int32 + tensor.int_data.extend(model_result[name].reshape(-1) + .tolist()) + else: + raise Exception("error type.") + tensor.shape.extend(list(model_result[name].shape)) + if "{}.lod".format(name) in model_result: + tensor.lod.extend(model_result["{}.lod".format(name)] + .tolist()) + inst.tensor_array.append(tensor) + model_output.insts.append(inst) + model_output.engine_name = model_name + resp.outputs.append(model_output) + return resp + + def SetTimeout(self, request, context): + # This porcess and Inference process cannot be operate at the same time. + # For performance reasons, do not add thread lock temporarily. + timeout_ms = request.timeout_ms + self._init_bclient(self.model_config_path_, self.endpoints_, timeout_ms) + resp = multi_lang_general_model_service_pb2.SimpleResponse() + resp.err_code = 0 + return resp + + def Inference(self, request, context): + feed_batch, fetch_names, is_python, log_id \ + = self._unpack_inference_request(request) + ret = self.bclient_.predict( + feed=feed_batch, + fetch=fetch_names, + batch=True, + need_variant_tag=True, + log_id=log_id) + return self._pack_inference_response(ret, fetch_names, is_python) + + def GetClientConfig(self, request, context): + resp = multi_lang_general_model_service_pb2.GetClientConfigResponse() + resp.client_config_str = self.model_config_str_ + return resp \ No newline at end of file diff --git a/python/paddle_serving_server/serve.py b/python/paddle_serving_server/serve.py index 684af801c..cdb10aa3d 100644 --- a/python/paddle_serving_server/serve.py +++ b/python/paddle_serving_server/serve.py @@ -18,12 +18,12 @@ python -m paddle_serving_server.serve --model ./serving_server_model --port 9292 """ import argparse -import sys +import os import json import base64 import time -from multiprocessing import Process -from .web_service import WebService, port_is_available +from multiprocessing import Pool, Process +from paddle_serving_server_gpu import serve_args from flask import Flask, request import sys if sys.version_info.major == 2: @@ -31,24 +31,26 @@ elif sys.version_info.major == 3: from http.server import BaseHTTPRequestHandler, HTTPServer - -def parse_args(): # pylint: disable=doc-string-missing +def serve_args(): parser = argparse.ArgumentParser("serve") parser.add_argument( - "--thread", type=int, default=10, help="Concurrency of server") + "--thread", type=int, default=2, help="Concurrency of server") parser.add_argument( - "--model", type=str, default="", help="Model for serving") + "--port", type=int, default=9292, help="Port of the starting gpu") parser.add_argument( - "--port", type=int, default=9292, help="Port the server") + "--device", type=str, default="gpu", help="Type of device") + parser.add_argument("--gpu_ids", type=str, default="", help="gpu ids") parser.add_argument( - "--name", type=str, default="None", help="Web service name") + "--model", type=str, default="", help="Model for serving") parser.add_argument( "--workdir", type=str, default="workdir", help="Working dir of current service") parser.add_argument( - "--device", type=str, default="cpu", help="Type of device") + "--name", type=str, default="None", help="Default service name") + parser.add_argument( + "--use_mkl", default=False, action="store_true", help="Use MKL") parser.add_argument( "--mem_optim_off", default=False, @@ -56,8 +58,6 @@ def parse_args(): # pylint: disable=doc-string-missing help="Memory optimize") parser.add_argument( "--ir_optim", default=False, action="store_true", help="Graph optimize") - parser.add_argument( - "--use_mkl", default=False, action="store_true", help="Use MKL") parser.add_argument( "--max_body_size", type=int, @@ -73,6 +73,12 @@ def parse_args(): # pylint: disable=doc-string-missing default=False, action="store_true", help="Use Multi-language-service") + parser.add_argument( + "--use_trt", default=False, action="store_true", help="Use TensorRT") + parser.add_argument( + "--use_lite", default=False, action="store_true", help="Use PaddleLite") + parser.add_argument( + "--use_xpu", default=False, action="store_true", help="Use XPU") parser.add_argument( "--product_name", type=str, @@ -85,26 +91,29 @@ def parse_args(): # pylint: disable=doc-string-missing help="container_id for authentication") return parser.parse_args() - -def start_standard_model(serving_port): # pylint: disable=doc-string-missing - args = parse_args() +def start_gpu_card_model(port, args, index = 0, gpuid): # pylint: disable=doc-string-missing + workdir = args.workdir + gpuid = int(gpuid) + device = "gpu" + if gpuid == -1: + device = "cpu" + elif gpuid >= 0: + port = port + index thread_num = args.thread model = args.model - port = serving_port - workdir = args.workdir - device = args.device mem_optim = args.mem_optim_off is False ir_optim = args.ir_optim - max_body_size = args.max_body_size use_mkl = args.use_mkl - use_encryption_model = args.use_encryption_model + max_body_size = args.max_body_size use_multilang = args.use_multilang + if gpuid >= 0: + workdir = "{}_{}".format(args.workdir, gpuid) if model == "": print("You must specify your serving model") exit(-1) - import paddle_serving_server as serving + import paddle_serving_server_gpu as serving op_maker = serving.OpMaker() read_op = op_maker.create('general_reader') general_infer_op = op_maker.create('general_infer') @@ -115,29 +124,84 @@ def start_standard_model(serving_port): # pylint: disable=doc-string-missing op_seq_maker.add_op(general_infer_op) op_seq_maker.add_op(general_response_op) - server = None if use_multilang: server = serving.MultiLangServer() else: server = serving.Server() server.set_op_sequence(op_seq_maker.get_op_sequence()) server.set_num_threads(thread_num) + server.use_mkl(use_mkl) server.set_memory_optimize(mem_optim) server.set_ir_optimize(ir_optim) - server.use_mkl(use_mkl) server.set_max_body_size(max_body_size) - server.set_port(port) - server.use_encryption_model(use_encryption_model) + if args.use_trt: + server.set_trt() + + if args.use_lite: + server.set_lite() + device = "arm" + + server.set_device(device) + if args.use_xpu: + server.set_xpu() + if args.product_name != None: server.set_product_name(args.product_name) if args.container_id != None: server.set_container_id(args.container_id) server.load_model_config(model) - server.prepare_server(workdir=workdir, port=port, device=device) + server.prepare_server( + workdir=workdir, + port=port, + device=device, + use_encryption_model=args.use_encryption_model) + if gpuid >= 0: + server.set_gpuid(gpuid) server.run_server() +def start_multi_card(args, serving_port=None): # pylint: disable=doc-string-missing + gpus = "" + if serving_port == None: + serving_port = args.port + if args.gpu_ids == "": + gpus = [] + else: + gpus = args.gpu_ids.split(",") + if "CUDA_VISIBLE_DEVICES" in os.environ: + env_gpus = os.environ["CUDA_VISIBLE_DEVICES"].split(",") + for ids in gpus: + if int(ids) >= len(env_gpus): + print( + " Max index of gpu_ids out of range, the number of CUDA_VISIBLE_DEVICES is {}." + .format(len(env_gpus))) + exit(-1) + else: + env_gpus = [] + if args.use_lite: + print("run arm server.") + start_gpu_card_model(-1, -1, args) + elif len(gpus) <= 0: + print("gpu_ids not set, going to run cpu service.") + start_gpu_card_model(-1, -1, serving_port, args) + else: + gpu_processes = [] + for i, gpu_id in enumerate(gpus): + p = Process( + target=start_gpu_card_model, + args=( + i, + gpu_id, + serving_port, + args, )) + gpu_processes.append(p) + for p in gpu_processes: + p.start() + for p in gpu_processes: + p.join() + + class MainService(BaseHTTPRequestHandler): def get_available_port(self): default_port = 12000 @@ -146,7 +210,7 @@ def get_available_port(self): return default_port + i def start_serving(self): - start_standard_model(serving_port) + start_multi_card(args, serving_port) def get_key(self, post_data): if "key" not in post_data: @@ -207,9 +271,9 @@ def do_POST(self): if __name__ == "__main__": - - args = parse_args() + args = serve_args() if args.name == "None": + from .web_service import port_is_available if args.use_encryption_model: p_flag = False p = None @@ -220,27 +284,39 @@ def do_POST(self): ) server.serve_forever() else: - start_standard_model(args.port) + start_multi_card(args) else: - service = WebService(name=args.name) - service.load_model_config(args.model) - service.prepare_server( - workdir=args.workdir, port=args.port, device=args.device) - service.run_rpc_service() + from .web_service import WebService + web_service = WebService(name=args.name) + web_service.load_model_config(args.model) + gpu_ids = args.gpu_ids + if gpu_ids == "": + if "CUDA_VISIBLE_DEVICES" in os.environ: + gpu_ids = os.environ["CUDA_VISIBLE_DEVICES"] + if len(gpu_ids) > 0: + web_service.set_gpus(gpu_ids) + web_service.prepare_server( + workdir=args.workdir, + port=args.port, + device=args.device, + use_lite=args.use_lite, + use_xpu=args.use_xpu, + ir_optim=args.ir_optim) + web_service.run_rpc_service() app_instance = Flask(__name__) @app_instance.before_first_request def init(): - service._launch_web_service() + web_service._launch_web_service() - service_name = "/" + service.name + "/prediction" + service_name = "/" + web_service.name + "/prediction" @app_instance.route(service_name, methods=["POST"]) def run(): - return service.get_prediction(request) + return web_service.get_prediction(request) app_instance.run(host="0.0.0.0", - port=service.port, + port=web_service.port, threaded=False, processes=4) diff --git a/python/paddle_serving_server_gpu/__init__.py b/python/paddle_serving_server/server.py similarity index 61% rename from python/paddle_serving_server_gpu/__init__.py rename to python/paddle_serving_server/server.py index 69dcf96de..38fe5117c 100644 --- a/python/paddle_serving_server_gpu/__init__.py +++ b/python/paddle_serving_server/server.py @@ -1,22 +1,5 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# pylint: disable=doc-string-missing import os -from .proto import server_configure_pb2 as server_sdk -from .proto import general_model_config_pb2 as m_config -import google.protobuf.text_format import tarfile import socket import paddle_serving_server_gpu as paddle_serving_server @@ -24,175 +7,18 @@ from .version import serving_server_version from contextlib import closing import argparse -import collections + import sys if sys.platform.startswith('win') is False: import fcntl import shutil import numpy as np import grpc -from .proto import multi_lang_general_model_service_pb2 import sys -sys.path.append( - os.path.join(os.path.abspath(os.path.dirname(__file__)), 'proto')) -from .proto import multi_lang_general_model_service_pb2_grpc + from multiprocessing import Pool, Process from concurrent import futures - -def serve_args(): - parser = argparse.ArgumentParser("serve") - parser.add_argument( - "--thread", type=int, default=2, help="Concurrency of server") - parser.add_argument( - "--model", type=str, default="", help="Model for serving") - parser.add_argument( - "--port", type=int, default=9292, help="Port of the starting gpu") - parser.add_argument( - "--workdir", - type=str, - default="workdir", - help="Working dir of current service") - parser.add_argument( - "--device", type=str, default="gpu", help="Type of device") - parser.add_argument("--gpu_ids", type=str, default="", help="gpu ids") - parser.add_argument( - "--name", type=str, default="None", help="Default service name") - parser.add_argument( - "--mem_optim_off", - default=False, - action="store_true", - help="Memory optimize") - parser.add_argument( - "--ir_optim", default=False, action="store_true", help="Graph optimize") - parser.add_argument( - "--max_body_size", - type=int, - default=512 * 1024 * 1024, - help="Limit sizes of messages") - parser.add_argument( - "--use_encryption_model", - default=False, - action="store_true", - help="Use encryption model") - parser.add_argument( - "--use_multilang", - default=False, - action="store_true", - help="Use Multi-language-service") - parser.add_argument( - "--use_trt", default=False, action="store_true", help="Use TensorRT") - parser.add_argument( - "--use_lite", default=False, action="store_true", help="Use PaddleLite") - parser.add_argument( - "--use_xpu", default=False, action="store_true", help="Use XPU") - parser.add_argument( - "--product_name", - type=str, - default=None, - help="product_name for authentication") - parser.add_argument( - "--container_id", - type=str, - default=None, - help="container_id for authentication") - return parser.parse_args() - - -class OpMaker(object): - def __init__(self): - self.op_dict = { - "general_infer": "GeneralInferOp", - "general_reader": "GeneralReaderOp", - "general_response": "GeneralResponseOp", - "general_text_reader": "GeneralTextReaderOp", - "general_text_response": "GeneralTextResponseOp", - "general_single_kv": "GeneralSingleKVOp", - "general_dist_kv_infer": "GeneralDistKVInferOp", - "general_dist_kv": "GeneralDistKVOp" - } - self.node_name_suffix_ = collections.defaultdict(int) - - def create(self, node_type, engine_name=None, inputs=[], outputs=[]): - if node_type not in self.op_dict: - raise Exception("Op type {} is not supported right now".format( - node_type)) - node = server_sdk.DAGNode() - # node.name will be used as the infer engine name - if engine_name: - node.name = engine_name - else: - node.name = '{}_{}'.format(node_type, - self.node_name_suffix_[node_type]) - self.node_name_suffix_[node_type] += 1 - - node.type = self.op_dict[node_type] - if inputs: - for dep_node_str in inputs: - dep_node = server_sdk.DAGNode() - google.protobuf.text_format.Parse(dep_node_str, dep_node) - dep = server_sdk.DAGNodeDependency() - dep.name = dep_node.name - dep.mode = "RO" - node.dependencies.extend([dep]) - # Because the return value will be used as the key value of the - # dict, and the proto object is variable which cannot be hashed, - # so it is processed into a string. This has little effect on - # overall efficiency. - return google.protobuf.text_format.MessageToString(node) - - -class OpSeqMaker(object): - def __init__(self): - self.workflow = server_sdk.Workflow() - self.workflow.name = "workflow1" - self.workflow.workflow_type = "Sequence" - - def add_op(self, node_str): - node = server_sdk.DAGNode() - google.protobuf.text_format.Parse(node_str, node) - if len(node.dependencies) > 1: - raise Exception( - 'Set more than one predecessor for op in OpSeqMaker is not allowed.' - ) - if len(self.workflow.nodes) >= 1: - if len(node.dependencies) == 0: - dep = server_sdk.DAGNodeDependency() - dep.name = self.workflow.nodes[-1].name - dep.mode = "RO" - node.dependencies.extend([dep]) - elif len(node.dependencies) == 1: - if node.dependencies[0].name != self.workflow.nodes[-1].name: - raise Exception( - 'You must add op in order in OpSeqMaker. The previous op is {}, but the current op is followed by {}.' - .format(node.dependencies[0].name, self.workflow.nodes[ - -1].name)) - self.workflow.nodes.extend([node]) - - def get_op_sequence(self): - workflow_conf = server_sdk.WorkflowConf() - workflow_conf.workflows.extend([self.workflow]) - return workflow_conf - - -class OpGraphMaker(object): - def __init__(self): - self.workflow = server_sdk.Workflow() - self.workflow.name = "workflow1" - # Currently, SDK only supports "Sequence" - self.workflow.workflow_type = "Sequence" - - def add_op(self, node_str): - node = server_sdk.DAGNode() - google.protobuf.text_format.Parse(node_str, node) - self.workflow.nodes.extend([node]) - - def get_op_graph(self): - workflow_conf = server_sdk.WorkflowConf() - workflow_conf.workflows.extend([self.workflow]) - return workflow_conf - - class Server(object): def __init__(self): self.server_handle_ = None @@ -217,6 +43,7 @@ def __init__(self): self.module_path = os.path.dirname(paddle_serving_server.__file__) self.cur_path = os.getcwd() self.use_local_bin = False + self.mkl_flag = False self.device = "cpu" self.gpuid = 0 self.use_trt = False @@ -431,6 +258,29 @@ def load_model_config(self, model_config_paths): str(f.read()), self.model_conf) # check config here # print config here + + def use_mkl(self, flag): + self.mkl_flag = flag + + def get_device_version(self): + avx_flag = False + mkl_flag = self.mkl_flag + openblas_flag = False + r = os.system("cat /proc/cpuinfo | grep avx > /dev/null 2>&1") + if r == 0: + avx_flag = True + if avx_flag: + if mkl_flag: + device_version = "serving-cpu-avx-mkl-" + else: + device_version = "serving-cpu-avx-openblas-" + else: + if mkl_flag: + print( + "Your CPU does not support AVX, server will running with noavx-openblas mode." + ) + device_version = "serving-cpu-noavx-openblas-" + return device_version def download_bin(self): os.chdir(self.module_path) @@ -494,7 +344,8 @@ def download_bin(self): version_file.close() os.chdir(self.cur_path) self.bin_path = self.server_path + "/serving" - + + def prepare_server(self, workdir=None, port=9292, @@ -613,158 +464,6 @@ def run_server(self): os.system(command) - -class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc. - MultiLangGeneralModelServiceServicer): - def __init__(self, model_config_path, is_multi_model, endpoints): - self.is_multi_model_ = is_multi_model - self.model_config_path_ = model_config_path - self.endpoints_ = endpoints - with open(self.model_config_path_) as f: - self.model_config_str_ = str(f.read()) - self._parse_model_config(self.model_config_str_) - self._init_bclient(self.model_config_path_, self.endpoints_) - - def _init_bclient(self, model_config_path, endpoints, timeout_ms=None): - from paddle_serving_client import Client - self.bclient_ = Client() - if timeout_ms is not None: - self.bclient_.set_rpc_timeout_ms(timeout_ms) - self.bclient_.load_client_config(model_config_path) - self.bclient_.connect(endpoints) - - def _parse_model_config(self, model_config_str): - model_conf = m_config.GeneralModelConfig() - model_conf = google.protobuf.text_format.Merge(model_config_str, - model_conf) - self.feed_names_ = [var.alias_name for var in model_conf.feed_var] - self.feed_types_ = {} - self.feed_shapes_ = {} - self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var] - self.fetch_types_ = {} - self.lod_tensor_set_ = set() - for i, var in enumerate(model_conf.feed_var): - self.feed_types_[var.alias_name] = var.feed_type - self.feed_shapes_[var.alias_name] = var.shape - if var.is_lod_tensor: - self.lod_tensor_set_.add(var.alias_name) - for i, var in enumerate(model_conf.fetch_var): - self.fetch_types_[var.alias_name] = var.fetch_type - if var.is_lod_tensor: - self.lod_tensor_set_.add(var.alias_name) - - def _flatten_list(self, nested_list): - for item in nested_list: - if isinstance(item, (list, tuple)): - for sub_item in self._flatten_list(item): - yield sub_item - else: - yield item - - def _unpack_inference_request(self, request): - feed_names = list(request.feed_var_names) - fetch_names = list(request.fetch_var_names) - is_python = request.is_python - log_id = request.log_id - feed_batch = [] - for feed_inst in request.insts: - feed_dict = {} - for idx, name in enumerate(feed_names): - var = feed_inst.tensor_array[idx] - v_type = self.feed_types_[name] - data = None - if is_python: - if v_type == 0: - data = np.frombuffer(var.data, dtype="int64") - elif v_type == 1: - data = np.frombuffer(var.data, dtype="float32") - elif v_type == 2: - data = np.frombuffer(var.data, dtype="int32") - else: - raise Exception("error type.") - else: - if v_type == 0: # int64 - data = np.array(list(var.int64_data), dtype="int64") - elif v_type == 1: # float32 - data = np.array(list(var.float_data), dtype="float32") - elif v_type == 2: - data = np.array(list(var.int_data), dtype="int32") - else: - raise Exception("error type.") - data.shape = list(feed_inst.tensor_array[idx].shape) - feed_dict[name] = data - if len(var.lod) > 0: - feed_dict["{}.lod".format(name)] = var.lod - feed_batch.append(feed_dict) - return feed_batch, fetch_names, is_python, log_id - - def _pack_inference_response(self, ret, fetch_names, is_python): - resp = multi_lang_general_model_service_pb2.InferenceResponse() - if ret is None: - resp.err_code = 1 - return resp - results, tag = ret - resp.tag = tag - resp.err_code = 0 - - if not self.is_multi_model_: - results = {'general_infer_0': results} - for model_name, model_result in results.items(): - model_output = multi_lang_general_model_service_pb2.ModelOutput() - inst = multi_lang_general_model_service_pb2.FetchInst() - for idx, name in enumerate(fetch_names): - tensor = multi_lang_general_model_service_pb2.Tensor() - v_type = self.fetch_types_[name] - if is_python: - tensor.data = model_result[name].tobytes() - else: - if v_type == 0: # int64 - tensor.int64_data.extend(model_result[name].reshape(-1) - .tolist()) - elif v_type == 1: # float32 - tensor.float_data.extend(model_result[name].reshape(-1) - .tolist()) - elif v_type == 2: # int32 - tensor.int_data.extend(model_result[name].reshape(-1) - .tolist()) - else: - raise Exception("error type.") - tensor.shape.extend(list(model_result[name].shape)) - if "{}.lod".format(name) in model_result: - tensor.lod.extend(model_result["{}.lod".format(name)] - .tolist()) - inst.tensor_array.append(tensor) - model_output.insts.append(inst) - model_output.engine_name = model_name - resp.outputs.append(model_output) - return resp - - def SetTimeout(self, request, context): - # This porcess and Inference process cannot be operate at the same time. - # For performance reasons, do not add thread lock temporarily. - timeout_ms = request.timeout_ms - self._init_bclient(self.model_config_path_, self.endpoints_, timeout_ms) - resp = multi_lang_general_model_service_pb2.SimpleResponse() - resp.err_code = 0 - return resp - - def Inference(self, request, context): - feed_batch, fetch_names, is_python, log_id \ - = self._unpack_inference_request(request) - ret = self.bclient_.predict( - feed=feed_batch, - fetch=fetch_names, - batch=True, - need_variant_tag=True, - log_id=log_id) - return self._pack_inference_response(ret, fetch_names, is_python) - - def GetClientConfig(self, request, context): - resp = multi_lang_general_model_service_pb2.GetClientConfigResponse() - resp.client_config_str = self.model_config_str_ - return resp - - class MultiLangServer(object): def __init__(self): self.bserver_ = Server() @@ -807,7 +506,10 @@ def set_op_sequence(self, op_seq): def set_op_graph(self, op_graph): self.bserver_.set_op_graph(op_graph) - + + def use_mkl(self, flag): + self.bserver_.use_mkl(flag) + def set_memory_optimize(self, flag=False): self.bserver_.set_memory_optimize(flag) @@ -878,4 +580,4 @@ def run_server(self): server.add_insecure_port('[::]:{}'.format(self.gport_)) server.start() p_bserver.join() - server.wait_for_termination() + server.wait_for_termination() \ No newline at end of file diff --git a/python/paddle_serving_server/version.py b/python/paddle_serving_server/version.py deleted file mode 100644 index 490ba962a..000000000 --- a/python/paddle_serving_server/version.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" Paddle Serving Client version string """ -serving_client_version = "0.0.0" -serving_server_version = "0.0.0" -module_proto_version = "0.0.0" -commit_id = "" diff --git a/python/paddle_serving_server/web_service.py b/python/paddle_serving_server/web_service.py index 3be818f0e..2332b9ba2 100644 --- a/python/paddle_serving_server/web_service.py +++ b/python/paddle_serving_server/web_service.py @@ -15,15 +15,18 @@ # pylint: disable=doc-string-missing from flask import Flask, request, abort -from multiprocessing import Pool, Process -from paddle_serving_server import OpMaker, OpSeqMaker, Server -from paddle_serving_client import Client from contextlib import closing +from multiprocessing import Pool, Process, Queue +from paddle_serving_client import Client +from paddle_serving_server_gpu import OpMaker, OpSeqMaker, Server +from paddle_serving_server_gpu.serve import start_multi_card import socket +import sys import numpy as np -from paddle_serving_server import pipeline -from paddle_serving_server.pipeline import Op +import paddle_serving_server_gpu as serving +from paddle_serving_server_gpu import pipeline +from paddle_serving_server_gpu.pipeline import Op def port_is_available(port): with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: @@ -34,13 +37,15 @@ def port_is_available(port): else: return False - class WebService(object): def __init__(self, name="default_service"): self.name = name # pipeline self._server = pipeline.PipelineServer(self.name) + self.gpus = [] # deprecated + self.rpc_service_list = [] # deprecated + def get_pipeline_response(self, read_op): return None @@ -77,58 +82,115 @@ def load_model_config(self, model_config): self.feed_vars = {var.name: var for var in model_conf.feed_var} self.fetch_vars = {var.name: var for var in model_conf.fetch_var} - def _launch_rpc_service(self): - op_maker = OpMaker() + def set_gpus(self, gpus): + print("This API will be deprecated later. Please do not use it") + self.gpus = [int(x) for x in gpus.split(",")] + + def default_rpc_service(self, + workdir="conf", + port=9292, + gpuid=0, + thread_num=2, + mem_optim=True, + use_lite=False, + use_xpu=False, + ir_optim=False): + device = "gpu" + if gpuid == -1: + if use_lite: + device = "arm" + else: + device = "cpu" + op_maker = serving.OpMaker() read_op = op_maker.create('general_reader') general_infer_op = op_maker.create('general_infer') general_response_op = op_maker.create('general_response') + op_seq_maker = OpSeqMaker() op_seq_maker.add_op(read_op) op_seq_maker.add_op(general_infer_op) op_seq_maker.add_op(general_response_op) + server = Server() server.set_op_sequence(op_seq_maker.get_op_sequence()) - server.set_num_threads(16) - server.set_memory_optimize(self.mem_optim) - server.set_ir_optimize(self.ir_optim) + server.set_num_threads(thread_num) + server.set_memory_optimize(mem_optim) + server.set_ir_optimize(ir_optim) + server.set_device(device) + + if use_lite: + server.set_lite() + if use_xpu: + server.set_xpu() + server.load_model_config(self.model_config) - server.prepare_server( - workdir=self.workdir, port=self.port_list[0], device=self.device) - server.run_server() - - def port_is_available(self, port): - with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: - sock.settimeout(2) - result = sock.connect_ex(('0.0.0.0', port)) - if result != 0: - return True - else: - return False + if gpuid >= 0: + server.set_gpuid(gpuid) + server.prepare_server(workdir=workdir, port=port, device=device) + return server + + def _launch_rpc_service(self, service_idx): + self.rpc_service_list[service_idx].run_server() def prepare_server(self, workdir="", port=9393, - device="cpu", - mem_optim=True, - ir_optim=False): + device="gpu", + use_lite=False, + use_xpu=False, + ir_optim=False, + gpuid=0, + mem_optim=True): print("This API will be deprecated later. Please do not use it") self.workdir = workdir self.port = port self.device = device - default_port = 12000 + self.gpuid = gpuid self.port_list = [] - self.mem_optim = mem_optim - self.ir_optim = ir_optim + default_port = 12000 for i in range(1000): if port_is_available(default_port + i): self.port_list.append(default_port + i) + if len(self.port_list) > len(self.gpus): break + if len(self.gpus) == 0: + # init cpu service + self.rpc_service_list.append( + self.default_rpc_service( + self.workdir, + self.port_list[0], + -1, + thread_num=2, + mem_optim=mem_optim, + use_lite=use_lite, + use_xpu=use_xpu, + ir_optim=ir_optim)) + else: + for i, gpuid in enumerate(self.gpus): + self.rpc_service_list.append( + self.default_rpc_service( + "{}_{}".format(self.workdir, i), + self.port_list[i], + gpuid, + thread_num=2, + mem_optim=mem_optim, + use_lite=use_lite, + use_xpu=use_xpu, + ir_optim=ir_optim)) + def _launch_web_service(self): + gpu_num = len(self.gpus) self.client = Client() self.client.load_client_config("{}/serving_server_conf.prototxt".format( self.model_config)) - self.client.connect(["0.0.0.0:{}".format(self.port_list[0])]) + endpoints = "" + if gpu_num > 0: + for i in range(gpu_num): + endpoints += "127.0.0.1:{},".format(self.port_list[i]) + else: + endpoints = "127.0.0.1:{}".format(self.port_list[0]) + self.client.connect([endpoints]) def get_prediction(self, request): if not request.json: @@ -158,8 +220,12 @@ def run_rpc_service(self): print("web service address:") print("http://{}:{}/{}/prediction".format(localIP, self.port, self.name)) - p_rpc = Process(target=self._launch_rpc_service) - p_rpc.start() + server_pros = [] + for i, service in enumerate(self.rpc_service_list): + p = Process(target=self._launch_rpc_service, args=(i, )) + server_pros.append(p) + for p in server_pros: + p.start() app_instance = Flask(__name__) @@ -175,7 +241,9 @@ def run(): self.app_instance = app_instance - def run_debugger_service(self): + # TODO: maybe change another API name: maybe run_local_predictor? + def run_debugger_service(self, gpu=False): + print("This API will be deprecated later. Please do not use it") import socket localIP = socket.gethostbyname(socket.gethostname()) print("web service address:") @@ -185,7 +253,7 @@ def run_debugger_service(self): @app_instance.before_first_request def init(): - self._launch_local_predictor() + self._launch_local_predictor(gpu) service_name = "/" + self.name + "/prediction" @@ -195,11 +263,11 @@ def run(): self.app_instance = app_instance - def _launch_local_predictor(self): + def _launch_local_predictor(self, gpu): from paddle_serving_app.local_predict import LocalPredictor self.client = LocalPredictor() self.client.load_model_config( - "{}".format(self.model_config), use_gpu=False) + "{}".format(self.model_config), use_gpu=True, gpu_id=self.gpus[0]) def run_web_service(self): print("This API will be deprecated later. Please do not use it") diff --git a/python/paddle_serving_server_gpu/monitor.py b/python/paddle_serving_server_gpu/monitor.py deleted file mode 100644 index 84146039c..000000000 --- a/python/paddle_serving_server_gpu/monitor.py +++ /dev/null @@ -1,504 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Usage: - Start monitor with one line command - Example: - python -m paddle_serving_server.monitor -""" -import os -import time -import argparse -import subprocess -import datetime -import shutil -import tarfile -import logging - -_LOGGER = logging.getLogger(__name__) - - -class Monitor(object): - ''' - Monitor base class. It is used to monitor the remote model, pull and update the local model. - ''' - - def __init__(self, interval): - self._remote_path = None - self._remote_model_name = None - self._remote_donefile_name = None - self._local_path = None - self._local_model_name = None - self._local_timestamp_file = None - self._interval = interval - self._remote_donefile_timestamp = None - self._local_tmp_path = None - self._unpacked_filename = None - - def set_remote_path(self, remote_path): - self._remote_path = remote_path - - def set_remote_model_name(self, model_name): - self._remote_model_name = model_name - - def set_remote_donefile_name(self, donefile_name): - self._remote_donefile_name = donefile_name - - def set_local_path(self, local_path): - self._local_path = local_path - - def set_local_model_name(self, model_name): - self._local_model_name = model_name - - def set_local_timestamp_file(self, timestamp_file): - self._local_timestamp_file = timestamp_file - - def set_local_tmp_path(self, tmp_path): - self._local_tmp_path = tmp_path - - def set_unpacked_filename(self, unpacked_filename): - self._unpacked_filename = unpacked_filename - - def _check_param_help(self, param_name, param_value): - return "Please check the {}({}) parameter.".format(param_name, - param_value) - - def _check_params(self, params): - for param in params: - if getattr(self, param, None) is None: - raise Exception('{} not set.'.format(param)) - - def _print_params(self, params_name): - self._check_params(params_name) - for name in params_name: - _LOGGER.info('{}: {}'.format(name, getattr(self, name))) - - def _decompress_model_file(self, local_tmp_path, model_name, - unpacked_filename): - if unpacked_filename is None: - _LOGGER.debug('remote file({}) is already unpacked.'.format( - model_name)) - return model_name - tar_model_path = os.path.join(local_tmp_path, model_name) - _LOGGER.info("try to unpack remote file({})".format(tar_model_path)) - if not tarfile.is_tarfile(tar_model_path): - raise Exception('not a tar packaged file type. {}'.format( - self._check_param_help('remote_model_name', model_name))) - try: - _LOGGER.info('unpack remote file({}).'.format(model_name)) - tar = tarfile.open(tar_model_path) - tar.extractall(local_tmp_path) - tar.close() - except: - raise Exception( - 'Decompressing failed, maybe no disk space left. {}'.foemat( - self._check_param_help('local_tmp_path', local_tmp_path))) - finally: - os.remove(tar_model_path) - _LOGGER.debug('remove packed file({}).'.format(tar_model_path)) - _LOGGER.info('using unpacked filename: {}.'.format( - unpacked_filename)) - if not os.path.exists( - os.path.join(local_tmp_path, unpacked_filename)): - raise Exception('file not exist. {}'.format( - self._check_param_help('unpacked_filename', - unpacked_filename))) - return unpacked_filename - - def run(self): - ''' - Monitor the remote model by polling and update the local model. - ''' - params = [ - '_remote_path', '_remote_model_name', '_remote_donefile_name', - '_local_model_name', '_local_path', '_local_timestamp_file', - '_local_tmp_path', '_interval' - ] - self._print_params(params) - local_tmp_path = os.path.join(self._local_path, self._local_tmp_path) - _LOGGER.info('local_tmp_path: {}'.format(local_tmp_path)) - if not os.path.exists(local_tmp_path): - _LOGGER.info('mkdir: {}'.format(local_tmp_path)) - os.makedirs(local_tmp_path) - while True: - [flag, timestamp] = self._exist_remote_file( - self._remote_path, self._remote_donefile_name, local_tmp_path) - if flag: - if self._remote_donefile_timestamp is None or \ - timestamp != self._remote_donefile_timestamp: - _LOGGER.info('doneilfe({}) changed.'.format( - self._remote_donefile_name)) - self._remote_donefile_timestamp = timestamp - self._pull_remote_dir(self._remote_path, - self._remote_model_name, - local_tmp_path) - _LOGGER.info('pull remote model({}).'.format( - self._remote_model_name)) - unpacked_filename = self._decompress_model_file( - local_tmp_path, self._remote_model_name, - self._unpacked_filename) - self._update_local_model(local_tmp_path, unpacked_filename, - self._local_path, - self._local_model_name) - _LOGGER.info('update local model({}).'.format( - self._local_model_name)) - self._update_local_donefile(self._local_path, - self._local_model_name, - self._local_timestamp_file) - _LOGGER.info('update model timestamp({}).'.format( - self._local_timestamp_file)) - else: - _LOGGER.info('remote({}) has no donefile.'.format( - self._remote_path)) - _LOGGER.info('sleep {}s.'.format(self._interval)) - time.sleep(self._interval) - - def _exist_remote_file(self, path, filename, local_tmp_path): - raise Exception('This function must be inherited.') - - def _pull_remote_dir(self, remote_path, dirname, local_tmp_path): - raise Exception('This function must be inherited.') - - def _update_local_model(self, local_tmp_path, remote_model_name, local_path, - local_model_name): - tmp_model_path = os.path.join(local_tmp_path, remote_model_name) - local_model_path = os.path.join(local_path, local_model_name) - cmd = 'cp -r {}/* {}'.format(tmp_model_path, local_model_path) - _LOGGER.debug('update model cmd: {}'.format(cmd)) - if os.system(cmd) != 0: - raise Exception('update local model failed.') - - def _update_local_donefile(self, local_path, local_model_name, - local_timestamp_file): - donefile_path = os.path.join(local_path, local_model_name, - local_timestamp_file) - cmd = 'touch {}'.format(donefile_path) - _LOGGER.debug('update timestamp cmd: {}'.format(cmd)) - if os.system(cmd) != 0: - raise Exception('update local donefile failed.') - - -class HadoopMonitor(Monitor): - ''' Monitor HDFS or AFS by Hadoop-client. ''' - - def __init__(self, hadoop_bin, fs_name='', fs_ugi='', interval=10): - super(HadoopMonitor, self).__init__(interval) - self._hadoop_bin = hadoop_bin - self._fs_name = fs_name - self._fs_ugi = fs_ugi - self._print_params(['_hadoop_bin', '_fs_name', '_fs_ugi']) - self._cmd_prefix = '{} fs '.format(self._hadoop_bin) - if self._fs_name: - self._cmd_prefix += '-D fs.default.name={} '.format(self._fs_name) - if self._fs_ugi: - self._cmd_prefix += '-D hadoop.job.ugi={} '.format(self._fs_ugi) - _LOGGER.info('Hadoop prefix cmd: {}'.format(self._cmd_prefix)) - - def _exist_remote_file(self, path, filename, local_tmp_path): - remote_filepath = os.path.join(path, filename) - cmd = '{} -ls {} 2>/dev/null'.format(self._cmd_prefix, remote_filepath) - _LOGGER.debug('check cmd: {}'.format(cmd)) - [status, output] = subprocess.getstatusoutput(cmd) - _LOGGER.debug('resp: {}'.format(output)) - if status == 0: - [_, _, _, _, _, mdate, mtime, _] = output.split('\n')[-1].split() - timestr = mdate + mtime - return [True, timestr] - else: - return [False, None] - - def _pull_remote_dir(self, remote_path, dirname, local_tmp_path): - # remove old file before pull remote dir - local_dirpath = os.path.join(local_tmp_path, dirname) - if os.path.exists(local_dirpath): - _LOGGER.info('remove old temporary model file({}).'.format(dirname)) - if self._unpacked_filename is None: - # the remote file is model folder. - shutil.rmtree(local_dirpath) - else: - # the remote file is a packed model file - os.remove(local_dirpath) - remote_dirpath = os.path.join(remote_path, dirname) - cmd = '{} -get {} {} 2>/dev/null'.format(self._cmd_prefix, - remote_dirpath, local_dirpath) - _LOGGER.debug('pull cmd: {}'.format(cmd)) - if os.system(cmd) != 0: - raise Exception('pull remote dir failed. {}'.format( - self._check_param_help('remote_model_name', dirname))) - - -class FTPMonitor(Monitor): - ''' FTP Monitor. ''' - - def __init__(self, host, port, username="", password="", interval=10): - super(FTPMonitor, self).__init__(interval) - import ftplib - self._ftp = ftplib.FTP() - self._ftp_host = host - self._ftp_port = port - self._ftp_username = username - self._ftp_password = password - self._ftp.connect(self._ftp_host, self._ftp_port) - self._ftp.login(self._ftp_username, self._ftp_password) - self._print_params( - ['_ftp_host', '_ftp_port', '_ftp_username', '_ftp_password']) - - def _exist_remote_file(self, path, filename, local_tmp_path): - import ftplib - try: - _LOGGER.debug('cwd: {}'.format(path)) - self._ftp.cwd(path) - timestamp = self._ftp.voidcmd('MDTM {}'.format(filename))[4:].strip( - ) - return [True, timestamp] - except ftplib.error_perm: - _LOGGER.debug('remote file({}) not exist.'.format(filename)) - return [False, None] - - def _download_remote_file(self, - remote_path, - remote_filename, - local_tmp_path, - overwrite=True): - local_fullpath = os.path.join(local_tmp_path, remote_filename) - if not overwrite and os.path.isfile(fullpath): - return - else: - with open(local_fullpath, 'wb') as f: - _LOGGER.debug('cwd: {}'.format(remote_path)) - self._ftp.cwd(remote_path) - _LOGGER.debug('download remote file({})'.format( - remote_filename)) - self._ftp.retrbinary('RETR {}'.format(remote_filename), f.write) - - def _download_remote_files(self, - remote_path, - remote_dirname, - local_tmp_path, - overwrite=True): - import ftplib - remote_dirpath = os.path.join(remote_path, remote_dirname) - # Check whether remote_dirpath is a file or a folder - try: - _LOGGER.debug('cwd: {}'.format(remote_dirpath)) - self._ftp.cwd(remote_dirpath) - _LOGGER.debug('{} is folder.'.format(remote_dirname)) - - local_dirpath = os.path.join(local_tmp_path, remote_dirname) - if not os.path.exists(local_dirpath): - _LOGGER.info('mkdir: {}'.format(local_dirpath)) - os.mkdir(local_dirpath) - - output = [] - self._ftp.dir(output.append) - for line in output: - [attr, _, _, _, _, _, _, _, name] = line.split() - if attr[0] == 'd': - self._download_remote_files( - os.path.join(remote_path, remote_dirname), name, - os.path.join(local_tmp_path, remote_dirname), overwrite) - else: - self._download_remote_file(remote_dirpath, name, - local_dirpath, overwrite) - except ftplib.error_perm: - _LOGGER.debug('{} is file.'.format(remote_dirname)) - self._download_remote_file(remote_path, remote_dirname, - local_tmp_path, overwrite) - return - - def _pull_remote_dir(self, remote_path, dirname, local_tmp_path): - self._download_remote_files( - remote_path, dirname, local_tmp_path, overwrite=True) - - -class GeneralMonitor(Monitor): - ''' General Monitor. ''' - - def __init__(self, host, interval=10): - super(GeneralMonitor, self).__init__(interval) - self._general_host = host - self._print_params(['_general_host']) - - def _get_local_file_timestamp(self, filename): - return os.path.getmtime(filename) - - def _exist_remote_file(self, remote_path, filename, local_tmp_path): - remote_filepath = os.path.join(remote_path, filename) - url = '{}/{}'.format(self._general_host, remote_filepath) - _LOGGER.debug('remote file url: {}'.format(url)) - # only for check donefile, which is not a folder. - cmd = 'wget -nd -N -P {} {} &>/dev/null'.format(local_tmp_path, url) - _LOGGER.debug('wget cmd: {}'.format(cmd)) - if os.system(cmd) != 0: - _LOGGER.debug('remote file({}) not exist.'.format(remote_filepath)) - return [False, None] - else: - timestamp = self._get_local_file_timestamp( - os.path.join(local_tmp_path, filename)) - return [True, timestamp] - - def _pull_remote_dir(self, remote_path, dirname, local_tmp_path): - remote_dirpath = os.path.join(remote_path, dirname) - url = '{}/{}'.format(self._general_host, remote_dirpath) - _LOGGER.debug('remote file url: {}'.format(url)) - if self._unpacked_filename is None: - # the remote file is model folder. - cmd = 'wget -nH -r -P {} {} &>/dev/null'.format( - os.path.join(local_tmp_path, dirname), url) - else: - # the remote file is a packed model file - cmd = 'wget -nd -N -P {} {} &>/dev/null'.format(local_tmp_path, url) - _LOGGER.debug('wget cmd: {}'.format(cmd)) - if os.system(cmd) != 0: - raise Exception('pull remote dir failed. {}'.format( - self._check_param_help('remote_model_name', dirname))) - - -def parse_args(): - """ parse args. - - Returns: - parser.parse_args(). - """ - parser = argparse.ArgumentParser(description="Monitor") - parser.add_argument( - "--type", type=str, default='general', help="Type of remote server") - parser.add_argument( - "--remote_path", - type=str, - required=True, - help="The base path for the remote") - parser.add_argument( - "--remote_model_name", - type=str, - required=True, - help="The model name to be pulled from the remote") - parser.add_argument( - "--remote_donefile_name", - type=str, - required=True, - help="The donefile name that marks the completion of the remote model update" - ) - parser.add_argument( - "--local_path", type=str, required=True, help="Local work path") - parser.add_argument( - "--local_model_name", type=str, required=True, help="Local model name") - parser.add_argument( - "--local_timestamp_file", - type=str, - default='fluid_time_file', - help="The timestamp file used locally for hot loading, The file is considered to be placed in the `local_path/local_model_name` folder." - ) - parser.add_argument( - "--local_tmp_path", - type=str, - default='_serving_monitor_tmp', - help="The path of the folder where temporary files are stored locally. If it does not exist, it will be created automatically" - ) - parser.add_argument( - "--unpacked_filename", - type=str, - default=None, - help="If the model of the remote production is a packaged file, the unpacked file name should be set. Currently, only tar packaging format is supported." - ) - parser.add_argument( - "--interval", - type=int, - default=10, - help="The polling interval in seconds") - parser.add_argument( - "--debug", action='store_true', help="If set, output more details") - parser.set_defaults(debug=False) - # general monitor - parser.add_argument("--general_host", type=str, help="General remote host") - # ftp monitor - parser.add_argument("--ftp_host", type=str, help="FTP remote host") - parser.add_argument("--ftp_port", type=int, help="FTP remote port") - parser.add_argument( - "--ftp_username", - type=str, - default='', - help="FTP username. Not used if anonymous access.") - parser.add_argument( - "--ftp_password", - type=str, - default='', - help="FTP password. Not used if anonymous access") - # afs/hdfs monitor - parser.add_argument( - "--hadoop_bin", type=str, help="Path of Hadoop binary file") - parser.add_argument( - "--fs_name", - type=str, - default='', - help="AFS/HDFS fs_name. Not used if set in Hadoop-client.") - parser.add_argument( - "--fs_ugi", - type=str, - default='', - help="AFS/HDFS fs_ugi, Not used if set in Hadoop-client") - return parser.parse_args() - - -def get_monitor(mtype): - """ generator monitor instance. - - Args: - mtype: type of monitor - - Returns: - monitor instance. - """ - if mtype == 'ftp': - return FTPMonitor( - args.ftp_host, - args.ftp_port, - username=args.ftp_username, - password=args.ftp_password, - interval=args.interval) - elif mtype == 'general': - return GeneralMonitor(args.general_host, interval=args.interval) - elif mtype == 'afs' or mtype == 'hdfs': - return HadoopMonitor( - args.hadoop_bin, args.fs_name, args.fs_ugi, interval=args.interval) - else: - raise Exception('unsupport type.') - - -def start_monitor(monitor, args): - monitor.set_remote_path(args.remote_path) - monitor.set_remote_model_name(args.remote_model_name) - monitor.set_remote_donefile_name(args.remote_donefile_name) - monitor.set_local_path(args.local_path) - monitor.set_local_model_name(args.local_model_name) - monitor.set_local_timestamp_file(args.local_timestamp_file) - monitor.set_local_tmp_path(args.local_tmp_path) - monitor.set_unpacked_filename(args.unpacked_filename) - monitor.run() - - -if __name__ == "__main__": - args = parse_args() - if args.debug: - logging.basicConfig( - format='%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', - datefmt='%Y-%m-%d %H:%M', - level=logging.DEBUG) - else: - logging.basicConfig( - format='%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', - datefmt='%Y-%m-%d %H:%M', - level=logging.INFO) - monitor = get_monitor(args.type) - start_monitor(monitor, args) diff --git a/python/paddle_serving_server_gpu/serve.py b/python/paddle_serving_server_gpu/serve.py deleted file mode 100644 index 13f081e72..000000000 --- a/python/paddle_serving_server_gpu/serve.py +++ /dev/null @@ -1,261 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Usage: - Host a trained paddle model with one line command - Example: - python -m paddle_serving_server.serve --model ./serving_server_model --port 9292 -""" -import argparse -import os -import json -import base64 -import time -from multiprocessing import Pool, Process -from paddle_serving_server_gpu import serve_args -from flask import Flask, request -import sys -if sys.version_info.major == 2: - from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer -elif sys.version_info.major == 3: - from http.server import BaseHTTPRequestHandler, HTTPServer - - -def start_gpu_card_model(index, gpuid, port, args): # pylint: disable=doc-string-missing - gpuid = int(gpuid) - device = "gpu" - if gpuid == -1: - device = "cpu" - elif gpuid >= 0: - port = port + index - thread_num = args.thread - model = args.model - mem_optim = args.mem_optim_off is False - ir_optim = args.ir_optim - max_body_size = args.max_body_size - use_multilang = args.use_multilang - workdir = args.workdir - if gpuid >= 0: - workdir = "{}_{}".format(args.workdir, gpuid) - - if model == "": - print("You must specify your serving model") - exit(-1) - - import paddle_serving_server_gpu as serving - op_maker = serving.OpMaker() - read_op = op_maker.create('general_reader') - general_infer_op = op_maker.create('general_infer') - general_response_op = op_maker.create('general_response') - - op_seq_maker = serving.OpSeqMaker() - op_seq_maker.add_op(read_op) - op_seq_maker.add_op(general_infer_op) - op_seq_maker.add_op(general_response_op) - - if use_multilang: - server = serving.MultiLangServer() - else: - server = serving.Server() - server.set_op_sequence(op_seq_maker.get_op_sequence()) - server.set_num_threads(thread_num) - server.set_memory_optimize(mem_optim) - server.set_ir_optimize(ir_optim) - server.set_max_body_size(max_body_size) - if args.use_trt: - server.set_trt() - - if args.use_lite: - server.set_lite() - device = "arm" - - server.set_device(device) - if args.use_xpu: - server.set_xpu() - - if args.product_name != None: - server.set_product_name(args.product_name) - if args.container_id != None: - server.set_container_id(args.container_id) - - server.load_model_config(model) - server.prepare_server( - workdir=workdir, - port=port, - device=device, - use_encryption_model=args.use_encryption_model) - if gpuid >= 0: - server.set_gpuid(gpuid) - server.run_server() - - -def start_multi_card(args, serving_port=None): # pylint: disable=doc-string-missing - gpus = "" - if serving_port == None: - serving_port = args.port - if args.gpu_ids == "": - gpus = [] - else: - gpus = args.gpu_ids.split(",") - if "CUDA_VISIBLE_DEVICES" in os.environ: - env_gpus = os.environ["CUDA_VISIBLE_DEVICES"].split(",") - for ids in gpus: - if int(ids) >= len(env_gpus): - print( - " Max index of gpu_ids out of range, the number of CUDA_VISIBLE_DEVICES is {}." - .format(len(env_gpus))) - exit(-1) - else: - env_gpus = [] - if args.use_lite: - print("run arm server.") - start_gpu_card_model(-1, -1, args) - elif len(gpus) <= 0: - print("gpu_ids not set, going to run cpu service.") - start_gpu_card_model(-1, -1, serving_port, args) - else: - gpu_processes = [] - for i, gpu_id in enumerate(gpus): - p = Process( - target=start_gpu_card_model, - args=( - i, - gpu_id, - serving_port, - args, )) - gpu_processes.append(p) - for p in gpu_processes: - p.start() - for p in gpu_processes: - p.join() - - -class MainService(BaseHTTPRequestHandler): - def get_available_port(self): - default_port = 12000 - for i in range(1000): - if port_is_available(default_port + i): - return default_port + i - - def start_serving(self): - start_multi_card(args, serving_port) - - def get_key(self, post_data): - if "key" not in post_data: - return False - else: - key = base64.b64decode(post_data["key"].encode()) - with open(args.model + "/key", "wb") as f: - f.write(key) - return True - - def check_key(self, post_data): - if "key" not in post_data: - return False - else: - key = base64.b64decode(post_data["key"].encode()) - with open(args.model + "/key", "rb") as f: - cur_key = f.read() - return (key == cur_key) - - def start(self, post_data): - post_data = json.loads(post_data) - global p_flag - if not p_flag: - if args.use_encryption_model: - print("waiting key for model") - if not self.get_key(post_data): - print("not found key in request") - return False - global serving_port - global p - serving_port = self.get_available_port() - p = Process(target=self.start_serving) - p.start() - time.sleep(3) - if p.is_alive(): - p_flag = True - else: - return False - else: - if p.is_alive(): - if not self.check_key(post_data): - return False - else: - return False - return True - - def do_POST(self): - content_length = int(self.headers['Content-Length']) - post_data = self.rfile.read(content_length) - if self.start(post_data): - response = {"endpoint_list": [serving_port]} - else: - response = {"message": "start serving failed"} - self.send_response(200) - self.send_header('Content-type', 'application/json') - self.end_headers() - self.wfile.write(json.dumps(response).encode()) - - -if __name__ == "__main__": - args = serve_args() - if args.name == "None": - from .web_service import port_is_available - if args.use_encryption_model: - p_flag = False - p = None - serving_port = 0 - server = HTTPServer(('localhost', int(args.port)), MainService) - print( - 'Starting encryption server, waiting for key from client, use to stop' - ) - server.serve_forever() - else: - start_multi_card(args) - else: - from .web_service import WebService - web_service = WebService(name=args.name) - web_service.load_model_config(args.model) - gpu_ids = args.gpu_ids - if gpu_ids == "": - if "CUDA_VISIBLE_DEVICES" in os.environ: - gpu_ids = os.environ["CUDA_VISIBLE_DEVICES"] - if len(gpu_ids) > 0: - web_service.set_gpus(gpu_ids) - web_service.prepare_server( - workdir=args.workdir, - port=args.port, - device=args.device, - use_lite=args.use_lite, - use_xpu=args.use_xpu, - ir_optim=args.ir_optim) - web_service.run_rpc_service() - - app_instance = Flask(__name__) - - @app_instance.before_first_request - def init(): - web_service._launch_web_service() - - service_name = "/" + web_service.name + "/prediction" - - @app_instance.route(service_name, methods=["POST"]) - def run(): - return web_service.get_prediction(request) - - app_instance.run(host="0.0.0.0", - port=web_service.port, - threaded=False, - processes=4) diff --git a/python/paddle_serving_server_gpu/version.py b/python/paddle_serving_server_gpu/version.py deleted file mode 100644 index b774c2237..000000000 --- a/python/paddle_serving_server_gpu/version.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" Paddle Serving Client version string """ -serving_client_version = "0.0.0" -serving_server_version = "0.0.0" -module_proto_version = "0.0.0" -cuda_version = "9" -commit_id = "" diff --git a/python/paddle_serving_server_gpu/web_service.py b/python/paddle_serving_server_gpu/web_service.py deleted file mode 100644 index 67b789266..000000000 --- a/python/paddle_serving_server_gpu/web_service.py +++ /dev/null @@ -1,310 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -#!flask/bin/python -# pylint: disable=doc-string-missing - -from flask import Flask, request, abort -from contextlib import closing -from multiprocessing import Pool, Process, Queue -from paddle_serving_client import Client -from paddle_serving_server_gpu import OpMaker, OpSeqMaker, Server -from paddle_serving_server_gpu.serve import start_multi_card -import socket -import sys -import numpy as np -import paddle_serving_server_gpu as serving - -from paddle_serving_server_gpu import pipeline -from paddle_serving_server_gpu.pipeline import Op - - -def port_is_available(port): - with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: - sock.settimeout(2) - result = sock.connect_ex(('0.0.0.0', port)) - if result != 0: - return True - else: - return False - - -class WebService(object): - def __init__(self, name="default_service"): - self.name = name - # pipeline - self._server = pipeline.PipelineServer(self.name) - - self.gpus = [] # deprecated - self.rpc_service_list = [] # deprecated - - def get_pipeline_response(self, read_op): - return None - - def prepare_pipeline_config(self, yaml_file): - # build dag - read_op = pipeline.RequestOp() - last_op = self.get_pipeline_response(read_op) - if not isinstance(last_op, Op): - raise ValueError("The return value type of `get_pipeline_response` " - "function is not Op type, please check function " - "`get_pipeline_response`.") - response_op = pipeline.ResponseOp(input_ops=[last_op]) - self._server.set_response_op(response_op) - self._server.prepare_server(yaml_file) - - def run_service(self): - self._server.run_server() - - def load_model_config(self, model_config): - print("This API will be deprecated later. Please do not use it") - self.model_config = model_config - import os - from .proto import general_model_config_pb2 as m_config - import google.protobuf.text_format - if os.path.isdir(model_config): - client_config = "{}/serving_server_conf.prototxt".format( - model_config) - elif os.path.isfile(model_config): - client_config = model_config - model_conf = m_config.GeneralModelConfig() - f = open(client_config, 'r') - model_conf = google.protobuf.text_format.Merge( - str(f.read()), model_conf) - self.feed_vars = {var.name: var for var in model_conf.feed_var} - self.fetch_vars = {var.name: var for var in model_conf.fetch_var} - - def set_gpus(self, gpus): - print("This API will be deprecated later. Please do not use it") - self.gpus = [int(x) for x in gpus.split(",")] - - def default_rpc_service(self, - workdir="conf", - port=9292, - gpuid=0, - thread_num=2, - mem_optim=True, - use_lite=False, - use_xpu=False, - ir_optim=False): - device = "gpu" - if gpuid == -1: - if use_lite: - device = "arm" - else: - device = "cpu" - op_maker = serving.OpMaker() - read_op = op_maker.create('general_reader') - general_infer_op = op_maker.create('general_infer') - general_response_op = op_maker.create('general_response') - - op_seq_maker = OpSeqMaker() - op_seq_maker.add_op(read_op) - op_seq_maker.add_op(general_infer_op) - op_seq_maker.add_op(general_response_op) - - server = Server() - server.set_op_sequence(op_seq_maker.get_op_sequence()) - server.set_num_threads(thread_num) - server.set_memory_optimize(mem_optim) - server.set_ir_optimize(ir_optim) - server.set_device(device) - - if use_lite: - server.set_lite() - if use_xpu: - server.set_xpu() - - server.load_model_config(self.model_config) - if gpuid >= 0: - server.set_gpuid(gpuid) - server.prepare_server(workdir=workdir, port=port, device=device) - return server - - def _launch_rpc_service(self, service_idx): - self.rpc_service_list[service_idx].run_server() - - def port_is_available(self, port): - with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: - sock.settimeout(2) - result = sock.connect_ex(('0.0.0.0', port)) - if result != 0: - return True - else: - return False - - def prepare_server(self, - workdir="", - port=9393, - device="gpu", - use_lite=False, - use_xpu=False, - ir_optim=False, - gpuid=0, - mem_optim=True): - print("This API will be deprecated later. Please do not use it") - self.workdir = workdir - self.port = port - self.device = device - self.gpuid = gpuid - self.port_list = [] - default_port = 12000 - for i in range(1000): - if port_is_available(default_port + i): - self.port_list.append(default_port + i) - if len(self.port_list) > len(self.gpus): - break - - if len(self.gpus) == 0: - # init cpu service - self.rpc_service_list.append( - self.default_rpc_service( - self.workdir, - self.port_list[0], - -1, - thread_num=2, - mem_optim=mem_optim, - use_lite=use_lite, - use_xpu=use_xpu, - ir_optim=ir_optim)) - else: - for i, gpuid in enumerate(self.gpus): - self.rpc_service_list.append( - self.default_rpc_service( - "{}_{}".format(self.workdir, i), - self.port_list[i], - gpuid, - thread_num=2, - mem_optim=mem_optim, - use_lite=use_lite, - use_xpu=use_xpu, - ir_optim=ir_optim)) - - def _launch_web_service(self): - gpu_num = len(self.gpus) - self.client = Client() - self.client.load_client_config("{}/serving_server_conf.prototxt".format( - self.model_config)) - endpoints = "" - if gpu_num > 0: - for i in range(gpu_num): - endpoints += "127.0.0.1:{},".format(self.port_list[i]) - else: - endpoints = "127.0.0.1:{}".format(self.port_list[0]) - self.client.connect([endpoints]) - - def get_prediction(self, request): - if not request.json: - abort(400) - if "fetch" not in request.json: - abort(400) - try: - feed, fetch, is_batch = self.preprocess(request.json["feed"], - request.json["fetch"]) - if isinstance(feed, dict) and "fetch" in feed: - del feed["fetch"] - if len(feed) == 0: - raise ValueError("empty input") - fetch_map = self.client.predict( - feed=feed, fetch=fetch, batch=is_batch) - result = self.postprocess( - feed=request.json["feed"], fetch=fetch, fetch_map=fetch_map) - result = {"result": result} - except ValueError as err: - result = {"result": str(err)} - return result - - def run_rpc_service(self): - print("This API will be deprecated later. Please do not use it") - import socket - localIP = socket.gethostbyname(socket.gethostname()) - print("web service address:") - print("http://{}:{}/{}/prediction".format(localIP, self.port, - self.name)) - server_pros = [] - for i, service in enumerate(self.rpc_service_list): - p = Process(target=self._launch_rpc_service, args=(i, )) - server_pros.append(p) - for p in server_pros: - p.start() - - app_instance = Flask(__name__) - - @app_instance.before_first_request - def init(): - self._launch_web_service() - - service_name = "/" + self.name + "/prediction" - - @app_instance.route(service_name, methods=["POST"]) - def run(): - return self.get_prediction(request) - - self.app_instance = app_instance - - # TODO: maybe change another API name: maybe run_local_predictor? - def run_debugger_service(self, gpu=False): - print("This API will be deprecated later. Please do not use it") - import socket - localIP = socket.gethostbyname(socket.gethostname()) - print("web service address:") - print("http://{}:{}/{}/prediction".format(localIP, self.port, - self.name)) - app_instance = Flask(__name__) - - @app_instance.before_first_request - def init(): - self._launch_local_predictor(gpu) - - service_name = "/" + self.name + "/prediction" - - @app_instance.route(service_name, methods=["POST"]) - def run(): - return self.get_prediction(request) - - self.app_instance = app_instance - - def _launch_local_predictor(self, gpu): - from paddle_serving_app.local_predict import LocalPredictor - self.client = LocalPredictor() - self.client.load_model_config( - "{}".format(self.model_config), use_gpu=True, gpu_id=self.gpus[0]) - - def run_web_service(self): - print("This API will be deprecated later. Please do not use it") - self.app_instance.run(host="0.0.0.0", port=self.port, threaded=True) - - def get_app_instance(self): - return self.app_instance - - def preprocess(self, feed=[], fetch=[]): - print("This API will be deprecated later. Please do not use it") - is_batch = True - feed_dict = {} - for var_name in self.feed_vars.keys(): - feed_dict[var_name] = [] - for feed_ins in feed: - for key in feed_ins: - feed_dict[key].append( - np.array(feed_ins[key]).reshape( - list(self.feed_vars[key].shape))[np.newaxis, :]) - feed = {} - for key in feed_dict: - feed[key] = np.concatenate(feed_dict[key], axis=0) - return feed, fetch, is_batch - - def postprocess(self, feed=[], fetch=[], fetch_map=None): - print("This API will be deprecated later. Please do not use it") - for key in fetch_map: - fetch_map[key] = fetch_map[key].tolist() - return fetch_map From 1adab94bd774d05108cfc456e24e11fe19accdde Mon Sep 17 00:00:00 2001 From: zhangjun Date: Thu, 11 Mar 2021 09:53:08 +0000 Subject: [PATCH 02/12] update python code --- python/CMakeLists.txt | 6 ++--- python/gen_version.py | 2 +- python/paddle_serving_server/__init__.py | 19 +++++++++++--- python/paddle_serving_server/serve.py | 2 +- python/paddle_serving_server/server.py | 28 +++++++++++---------- python/paddle_serving_server/version.py | 19 ++++++++++++++ python/paddle_serving_server/web_service.py | 10 ++++---- 7 files changed, 59 insertions(+), 27 deletions(-) create mode 100644 python/paddle_serving_server/version.py diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 924c79794..abb75a96d 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -63,11 +63,11 @@ endif() if (SERVER) if(CUDA_VERSION EQUAL 10.1) - set(SUFFIX 101) + set(VERSION_SUFFIX 101) elseif(CUDA_VERSION EQUAL 10.2) - set(SUFFIX 102) + set(VERSION_SUFFIX 102) elseif(CUDA_VERSION EQUAL 11.0) - set(SUFFIX 11) + set(VERSION_SUFFIX 11) endif() add_custom_command( diff --git a/python/gen_version.py b/python/gen_version.py index a13c52774..ed812a924 100644 --- a/python/gen_version.py +++ b/python/gen_version.py @@ -35,7 +35,7 @@ def update_info(file_name, feature, info): if len(sys.argv) > 2: - update_info("paddle_serving_server/version.py", "cuda_version", + update_info("paddle_serving_server/version.py", "version_suffix", sys.argv[2]) path = "paddle_serving_" + sys.argv[1] diff --git a/python/paddle_serving_server/__init__.py b/python/paddle_serving_server/__init__.py index 102695c88..e7633b23b 100644 --- a/python/paddle_serving_server/__init__.py +++ b/python/paddle_serving_server/__init__.py @@ -13,8 +13,19 @@ # limitations under the License. # pylint: disable=doc-string-missing -SERVER_VERSION = "0.0.0" +from . import dag +from . import monitor +from . import rpc_service +from . import serve +from . import web_service +from . import version + +from dag import * +from monitor import * +from rpc_service import * +from serve import * +from web_service import * +from version import * -__version__ = SERVER_VERSION -cuda_version = "9" -commit_id = "" \ No newline at end of file +SERVER_VERSION = "0.0.0" +__version__ = SERVER_VERSION \ No newline at end of file diff --git a/python/paddle_serving_server/serve.py b/python/paddle_serving_server/serve.py index cdb10aa3d..8e6e8a093 100644 --- a/python/paddle_serving_server/serve.py +++ b/python/paddle_serving_server/serve.py @@ -23,7 +23,7 @@ import base64 import time from multiprocessing import Pool, Process -from paddle_serving_server_gpu import serve_args +from paddle_serving_server import serve_args from flask import Flask, request import sys if sys.version_info.major == 2: diff --git a/python/paddle_serving_server/server.py b/python/paddle_serving_server/server.py index 38fe5117c..ec0c545b5 100644 --- a/python/paddle_serving_server/server.py +++ b/python/paddle_serving_server/server.py @@ -2,7 +2,7 @@ import os import tarfile import socket -import paddle_serving_server_gpu as paddle_serving_server +import paddle_serving_server as paddle_serving_server import time from .version import serving_server_version from contextlib import closing @@ -157,18 +157,19 @@ def _prepare_engine(self, model_config_paths, device, use_encryption_model): if device == "arm": engine.use_lite = self.use_lite engine.use_xpu = self.use_xpu - if device == "cpu": - if use_encryption_model: - engine.type = "FLUID_CPU_ANALYSIS_ENCRPT" - else: - engine.type = "FLUID_CPU_ANALYSIS" + suffix - elif device == "gpu": - if use_encryption_model: - engine.type = "FLUID_GPU_ANALYSIS_ENCRPT" - else: - engine.type = "FLUID_GPU_ANALYSIS" + suffix - elif device == "arm": - engine.type = "FLUID_ARM_ANALYSIS" + suffix + engine.type = "PaddleInferenceEngine" + # if device == "cpu": + # if use_encryption_model: + # engine.type = "FLUID_CPU_ANALYSIS_ENCRPT" + # else: + # engine.type = "FLUID_CPU_ANALYSIS" + suffix + # elif device == "gpu": + # if use_encryption_model: + # engine.type = "FLUID_GPU_ANALYSIS_ENCRPT" + # else: + # engine.type = "FLUID_GPU_ANALYSIS" + suffix + # elif device == "arm": + # engine.type = "FLUID_ARM_ANALYSIS" + suffix self.model_toolkit_conf.engines.extend([engine]) def _prepare_infer_service(self, port): @@ -290,6 +291,7 @@ def download_bin(self): version_file = open("{}/version.py".format(self.module_path), "r") import re for line in version_file.readlines(): + # to add, version_suffix if re.match("cuda_version", line): cuda_version = line.split("\"")[1] if cuda_version == "101" or cuda_version == "102": diff --git a/python/paddle_serving_server/version.py b/python/paddle_serving_server/version.py new file mode 100644 index 000000000..b774c2237 --- /dev/null +++ b/python/paddle_serving_server/version.py @@ -0,0 +1,19 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" Paddle Serving Client version string """ +serving_client_version = "0.0.0" +serving_server_version = "0.0.0" +module_proto_version = "0.0.0" +cuda_version = "9" +commit_id = "" diff --git a/python/paddle_serving_server/web_service.py b/python/paddle_serving_server/web_service.py index 2332b9ba2..2b1d2b3f3 100644 --- a/python/paddle_serving_server/web_service.py +++ b/python/paddle_serving_server/web_service.py @@ -18,15 +18,15 @@ from contextlib import closing from multiprocessing import Pool, Process, Queue from paddle_serving_client import Client -from paddle_serving_server_gpu import OpMaker, OpSeqMaker, Server -from paddle_serving_server_gpu.serve import start_multi_card +from paddle_serving_server import OpMaker, OpSeqMaker, Server +from paddle_serving_server.serve import start_multi_card import socket import sys import numpy as np -import paddle_serving_server_gpu as serving +import paddle_serving_server as serving -from paddle_serving_server_gpu import pipeline -from paddle_serving_server_gpu.pipeline import Op +from paddle_serving_server import pipeline +from paddle_serving_server.pipeline import Op def port_is_available(port): with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: From e6b2f87753f32bc60860256a9feaf429679a64d5 Mon Sep 17 00:00:00 2001 From: zhangjun Date: Thu, 11 Mar 2021 09:53:17 +0000 Subject: [PATCH 03/12] update python CMakeLists.txt --- python/CMakeLists.txt | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index abb75a96d..d12af3ad0 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -62,12 +62,19 @@ if (CLIENT) endif() if (SERVER) - if(CUDA_VERSION EQUAL 10.1) - set(VERSION_SUFFIX 101) - elseif(CUDA_VERSION EQUAL 10.2) - set(VERSION_SUFFIX 102) - elseif(CUDA_VERSION EQUAL 11.0) - set(VERSION_SUFFIX 11) + # todo, generate suffix for cpu、gpu、arm + if(WITH_TRT) + if(CUDA_VERSION EQUAL 10.1) + set(VERSION_SUFFIX 101) + elseif(CUDA_VERSION EQUAL 10.2) + set(VERSION_SUFFIX 102) + elseif(CUDA_VERSION EQUAL 11.0) + set(VERSION_SUFFIX 11) + endif() + endif() + + if(WITH_LITE) + set(VERSION_SUFFIX xpu) endif() add_custom_command( From 98ab4b0d02bbe56a2a1c2c85c4397d35b5b49706 Mon Sep 17 00:00:00 2001 From: zhangjun Date: Thu, 11 Mar 2021 08:33:21 +0000 Subject: [PATCH 04/12] proto gen cmake fix --- core/configure/CMakeLists.txt | 200 +++++++++++++++------------------- 1 file changed, 87 insertions(+), 113 deletions(-) diff --git a/core/configure/CMakeLists.txt b/core/configure/CMakeLists.txt index 8e2b62eb6..32534fee1 100644 --- a/core/configure/CMakeLists.txt +++ b/core/configure/CMakeLists.txt @@ -1,120 +1,94 @@ if (SERVER OR CLIENT) -LIST(APPEND protofiles - ${CMAKE_CURRENT_LIST_DIR}/proto/server_configure.proto - ${CMAKE_CURRENT_LIST_DIR}/proto/sdk_configure.proto - ${CMAKE_CURRENT_LIST_DIR}/proto/inferencer_configure.proto - ${CMAKE_CURRENT_LIST_DIR}/proto/general_model_config.proto -) - -PROTOBUF_GENERATE_CPP(configure_proto_srcs configure_proto_hdrs ${protofiles}) -list(APPEND configure_srcs ${configure_proto_srcs}) - -list(APPEND configure_srcs ${CMAKE_CURRENT_LIST_DIR}/src/configure_parser.cpp) - -add_library(configure ${configure_srcs}) -add_dependencies(configure brpc) - -install(TARGETS configure - ARCHIVE DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/lib - ) - -install(FILES ${CMAKE_CURRENT_LIST_DIR}/include/configure_parser.h - DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/include/configure/include) - -FILE(GLOB inc ${CMAKE_CURRENT_BINARY_DIR}/*.pb.h) - -install(FILES ${inc} - DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/include/configure) + LIST(APPEND protofiles + ${CMAKE_CURRENT_LIST_DIR}/proto/server_configure.proto + ${CMAKE_CURRENT_LIST_DIR}/proto/sdk_configure.proto + ${CMAKE_CURRENT_LIST_DIR}/proto/inferencer_configure.proto + ${CMAKE_CURRENT_LIST_DIR}/proto/general_model_config.proto + ) + + PROTOBUF_GENERATE_CPP(configure_proto_srcs configure_proto_hdrs ${protofiles}) + list(APPEND configure_srcs ${configure_proto_srcs}) + + list(APPEND configure_srcs ${CMAKE_CURRENT_LIST_DIR}/src/configure_parser.cpp) + + add_library(configure ${configure_srcs}) + add_dependencies(configure brpc) + + install(TARGETS configure + ARCHIVE DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/lib + ) + + install(FILES ${CMAKE_CURRENT_LIST_DIR}/include/configure_parser.h + DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/include/configure/include) + + FILE(GLOB inc ${CMAKE_CURRENT_BINARY_DIR}/*.pb.h) + + install(FILES ${inc} + DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/include/configure) endif() if (WITH_PYTHON) -py_proto_compile(general_model_config_py_proto SRCS proto/general_model_config.proto) -add_custom_target(general_model_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) -add_dependencies(general_model_config_py_proto general_model_config_py_proto_init) - -py_grpc_proto_compile(multi_lang_general_model_service_py_proto SRCS proto/multi_lang_general_model_service.proto) -add_custom_target(multi_lang_general_model_service_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) -add_dependencies(multi_lang_general_model_service_py_proto multi_lang_general_model_service_py_proto_init) - -if (CLIENT) -py_proto_compile(sdk_configure_py_proto SRCS proto/sdk_configure.proto) -add_custom_target(sdk_configure_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) -add_dependencies(sdk_configure_py_proto sdk_configure_py_proto_init) -add_custom_command(TARGET sdk_configure_py_proto POST_BUILD - COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto - COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto - COMMENT "Copy generated python proto into directory paddle_serving_client/proto." - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) - -add_custom_command(TARGET general_model_config_py_proto POST_BUILD - COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto - COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto - COMMENT "Copy generated general_model_config proto file into directory paddle_serving_client/proto." - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) - -add_custom_command(TARGET multi_lang_general_model_service_py_proto POST_BUILD - COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto - COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto - COMMENT "Copy generated multi_lang_general_model_service proto file into directory paddle_serving_client/proto." - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) -endif() - -if (APP) -add_custom_command(TARGET general_model_config_py_proto POST_BUILD - COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_app/proto - COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_app/proto - COMMENT "Copy generated general_model_config proto file into directory paddle_serving_app/proto." - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) -endif() - -if (SERVER) -py_proto_compile(server_config_py_proto SRCS proto/server_configure.proto) -add_custom_target(server_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) -add_dependencies(server_config_py_proto server_config_py_proto_init) -if (NOT WITH_GPU AND NOT WITH_LITE) -add_custom_command(TARGET server_config_py_proto POST_BUILD - COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto - COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto - COMMENT "Copy generated python proto into directory paddle_serving_server/proto." - WORKING_DIRECTORY ${CMAKE_CURRENT_BINRARY_DIR}) - -add_custom_command(TARGET general_model_config_py_proto POST_BUILD - COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto - COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto - COMMENT "Copy generated general_model_config proto file into directory paddle_serving_server/proto." - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) - -add_custom_command(TARGET multi_lang_general_model_service_py_proto POST_BUILD - COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto - COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto - COMMENT "Copy generated multi_lang_general_model_service proto file into directory paddle_serving_server/proto." - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) -else() -add_custom_command(TARGET server_config_py_proto POST_BUILD - COMMAND ${CMAKE_COMMAND} -E make_directory - ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/proto - COMMAND cp -f *.py - ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/proto - COMMENT "Copy generated python proto into directory - paddle_serving_server_gpu/proto." - WORKING_DIRECTORY ${CMAKE_CURRENT_BINRARY_DIR}) - -add_custom_command(TARGET general_model_config_py_proto POST_BUILD - COMMAND ${CMAKE_COMMAND} -E make_directory - ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/proto - COMMAND cp -f *.py - ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/proto - COMMENT "Copy generated general_model_config proto file into directory - paddle_serving_server_gpu/proto." - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) - -add_custom_command(TARGET multi_lang_general_model_service_py_proto POST_BUILD - COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/proto - COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/proto - COMMENT "Copy generated multi_lang_general_model_service proto file into directory paddle_serving_server_gpu/proto." - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) -endif() -endif() + py_proto_compile(general_model_config_py_proto SRCS proto/general_model_config.proto) + add_custom_target(general_model_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) + add_dependencies(general_model_config_py_proto general_model_config_py_proto_init) + + py_grpc_proto_compile(multi_lang_general_model_service_py_proto SRCS proto/multi_lang_general_model_service.proto) + add_custom_target(multi_lang_general_model_service_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) + add_dependencies(multi_lang_general_model_service_py_proto multi_lang_general_model_service_py_proto_init) + + if (CLIENT) + py_proto_compile(sdk_configure_py_proto SRCS proto/sdk_configure.proto) + add_custom_target(sdk_configure_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) + add_dependencies(sdk_configure_py_proto sdk_configure_py_proto_init) + add_custom_command(TARGET sdk_configure_py_proto POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto + COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto + COMMENT "Copy generated python proto into directory paddle_serving_client/proto." + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + + add_custom_command(TARGET general_model_config_py_proto POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto + COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto + COMMENT "Copy generated general_model_config proto file into directory paddle_serving_client/proto." + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + + add_custom_command(TARGET multi_lang_general_model_service_py_proto POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto + COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto + COMMENT "Copy generated multi_lang_general_model_service proto file into directory paddle_serving_client/proto." + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + endif() + + if (APP) + add_custom_command(TARGET general_model_config_py_proto POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_app/proto + COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_app/proto + COMMENT "Copy generated general_model_config proto file into directory paddle_serving_app/proto." + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + endif() + + if (SERVER) + py_proto_compile(server_config_py_proto SRCS proto/server_configure.proto) + add_custom_target(server_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) + add_dependencies(server_config_py_proto server_config_py_proto_init) + add_custom_command(TARGET server_config_py_proto POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto + COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto + COMMENT "Copy generated python proto into directory paddle_serving_server/proto." + WORKING_DIRECTORY ${CMAKE_CURRENT_BINRARY_DIR}) + + add_custom_command(TARGET general_model_config_py_proto POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto + COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto + COMMENT "Copy generated general_model_config proto file into directory paddle_serving_server/proto." + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + + add_custom_command(TARGET multi_lang_general_model_service_py_proto POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto + COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto + COMMENT "Copy generated multi_lang_general_model_service proto file into directory paddle_serving_server/proto." + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + endif() endif() From e1c47de433e371ca7c3ec302c8a321f962cc056f Mon Sep 17 00:00:00 2001 From: zhangjun Date: Thu, 11 Mar 2021 09:35:31 +0000 Subject: [PATCH 05/12] rename paddle_serving_server_gpu --- python/examples/bert/README.md | 2 +- python/examples/bert/README_CN.md | 2 +- python/examples/bert/benchmark.sh | 2 +- .../examples/bert/benchmark_with_profile.sh | 2 +- python/examples/bert/bert_gpu_server.py | 6 +- python/examples/bert/bert_web_service_gpu.py | 2 +- python/examples/cascade_rcnn/README.md | 2 +- python/examples/cascade_rcnn/README_CN.md | 2 +- python/examples/criteo_ctr/README.md | 2 +- python/examples/criteo_ctr/README_CN.md | 2 +- python/examples/deeplabv3/README.md | 2 +- python/examples/deeplabv3/README_CN.md | 2 +- .../faster_rcnn_r50_fpn_1x_coco/README.md | 2 +- .../faster_rcnn_r50_fpn_1x_coco/README_CN.md | 2 +- .../ppyolo_r50vd_dcn_1x_coco/README.md | 2 +- .../ppyolo_r50vd_dcn_1x_coco/README_CN.md | 2 +- .../ttfnet_darknet53_1x_coco/README.md | 2 +- .../ttfnet_darknet53_1x_coco/README_CN.md | 2 +- .../yolov3_darknet53_270e_coco/README.md | 2 +- .../yolov3_darknet53_270e_coco/README_CN.md | 2 +- python/examples/encryption/README.md | 2 +- python/examples/encryption/README_CN.md | 2 +- .../fit_a_line/test_server_gpu.py | 6 +- .../grpc_impl_example/yolov4/README.md | 2 +- .../grpc_impl_example/yolov4/README_CN.md | 2 +- python/examples/imagenet/README.md | 2 +- python/examples/imagenet/README_CN.md | 2 +- python/examples/imagenet/benchmark.sh | 2 +- .../examples/imagenet/resnet50_web_service.py | 2 +- python/examples/mobilenet/README.md | 2 +- python/examples/mobilenet/README_CN.md | 2 +- python/examples/ocr/README.md | 2 +- python/examples/ocr/README_CN.md | 2 +- python/examples/ocr/det_debugger_server.py | 2 +- python/examples/ocr/det_web_server.py | 2 +- python/examples/ocr/ocr_debugger_server.py | 2 +- python/examples/ocr/ocr_web_server.py | 2 +- python/examples/ocr/rec_debugger_server.py | 2 +- python/examples/ocr/rec_web_server.py | 2 +- .../pipeline/imagenet/pipeline_rpc_client.py | 2 +- .../pipeline/imagenet/resnet50_web_service.py | 2 +- .../test_pipeline_server.py | 2 +- .../pipeline/ocr/pipeline_rpc_client.py | 2 +- python/examples/pipeline/ocr/web_service.py | 2 +- .../simple_web_service/web_service.py | 2 +- python/examples/resnet_v2_50/README.md | 2 +- python/examples/resnet_v2_50/README_CN.md | 2 +- python/examples/unet_for_image_seg/README.md | 2 +- .../examples/unet_for_image_seg/README_CN.md | 2 +- python/examples/xpu/fit_a_line_xpu/README.md | 2 +- .../xpu/fit_a_line_xpu/test_server.py | 2 +- .../examples/xpu/resnet_v2_50_xpu/README.md | 2 +- .../xpu/resnet_v2_50_xpu/README_CN.md | 2 +- python/examples/yolov4/README.md | 2 +- python/examples/yolov4/README_CN.md | 2 +- python/paddle_serving_client/__init__.py | 722 +----------------- python/paddle_serving_client/client.py | 715 +++++++++++++++++ python/paddle_serving_server/serve.py | 56 +- python/pipeline/local_service_handler.py | 6 +- python/setup.py.server_gpu.in | 42 +- 60 files changed, 873 insertions(+), 786 deletions(-) create mode 100644 python/paddle_serving_client/client.py diff --git a/python/examples/bert/README.md b/python/examples/bert/README.md index 90e96d53a..1313ad415 100644 --- a/python/examples/bert/README.md +++ b/python/examples/bert/README.md @@ -48,7 +48,7 @@ python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 #c ``` Or,start gpu inference service,Run ``` -python -m paddle_serving_server_gpu.serve --model bert_seq128_model/ --port 9292 --gpu_ids 0 #launch gpu inference service at GPU 0 +python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 --gpu_ids 0 #launch gpu inference service at GPU 0 ``` ### RPC Inference diff --git a/python/examples/bert/README_CN.md b/python/examples/bert/README_CN.md index ef780b1cc..4fa42c78a 100644 --- a/python/examples/bert/README_CN.md +++ b/python/examples/bert/README_CN.md @@ -45,7 +45,7 @@ python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 # ``` 或者,启动gpu预测服务,执行 ``` -python -m paddle_serving_server_gpu.serve --model bert_seq128_model/ --port 9292 --gpu_ids 0 #在gpu 0上启动gpu预测服务 +python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 --gpu_ids 0 #在gpu 0上启动gpu预测服务 ``` diff --git a/python/examples/bert/benchmark.sh b/python/examples/bert/benchmark.sh index 09e9e1bc2..525e955e9 100644 --- a/python/examples/bert/benchmark.sh +++ b/python/examples/bert/benchmark.sh @@ -12,7 +12,7 @@ else mkdir utilization fi #start server -$PYTHONROOT/bin/python3 -m paddle_serving_server_gpu.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim --ir_optim > elog 2>&1 & +$PYTHONROOT/bin/python3 -m paddle_serving_server.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim --ir_optim > elog 2>&1 & sleep 5 #warm up diff --git a/python/examples/bert/benchmark_with_profile.sh b/python/examples/bert/benchmark_with_profile.sh index 8102e30d5..074a9acd2 100644 --- a/python/examples/bert/benchmark_with_profile.sh +++ b/python/examples/bert/benchmark_with_profile.sh @@ -1,5 +1,5 @@ export CUDA_VISIBLE_DEVICES=0,1,2,3 -python -m paddle_serving_server_gpu.serve --model bert_seq20_model/ --port 9295 --thread 4 --gpu_ids 0,1,2,3 2> elog > stdlog & +python -m paddle_serving_server.serve --model bert_seq20_model/ --port 9295 --thread 4 --gpu_ids 0,1,2,3 2> elog > stdlog & export FLAGS_profile_client=1 export FLAGS_profile_server=1 sleep 5 diff --git a/python/examples/bert/bert_gpu_server.py b/python/examples/bert/bert_gpu_server.py index 3fd64c345..7708a0786 100644 --- a/python/examples/bert/bert_gpu_server.py +++ b/python/examples/bert/bert_gpu_server.py @@ -14,9 +14,9 @@ import os import sys -from paddle_serving_server_gpu import OpMaker -from paddle_serving_server_gpu import OpSeqMaker -from paddle_serving_server_gpu import Server +from paddle_serving_server import OpMaker +from paddle_serving_server import OpSeqMaker +from paddle_serving_server import Server op_maker = OpMaker() read_op = op_maker.create('general_reader') diff --git a/python/examples/bert/bert_web_service_gpu.py b/python/examples/bert/bert_web_service_gpu.py index cbdd321c0..fb332bca3 100644 --- a/python/examples/bert/bert_web_service_gpu.py +++ b/python/examples/bert/bert_web_service_gpu.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # pylint: disable=doc-string-missing -from paddle_serving_server_gpu.web_service import WebService +from paddle_serving_server.web_service import WebService from paddle_serving_app.reader import ChineseBertReader import sys import os diff --git a/python/examples/cascade_rcnn/README.md b/python/examples/cascade_rcnn/README.md index 87617a842..f8aa79e8b 100644 --- a/python/examples/cascade_rcnn/README.md +++ b/python/examples/cascade_rcnn/README.md @@ -10,7 +10,7 @@ If you want to have more detection models, please refer to [Paddle Detection Mod ### Start the service ``` -python -m paddle_serving_server_gpu.serve --model serving_server --port 9292 --gpu_id 0 +python -m paddle_serving_server.serve --model serving_server --port 9292 --gpu_id 0 ``` ### Perform prediction diff --git a/python/examples/cascade_rcnn/README_CN.md b/python/examples/cascade_rcnn/README_CN.md index a37cb4733..99606de41 100644 --- a/python/examples/cascade_rcnn/README_CN.md +++ b/python/examples/cascade_rcnn/README_CN.md @@ -10,7 +10,7 @@ sh get_data.sh ### 启动服务 ``` -python -m paddle_serving_server_gpu.serve --model serving_server --port 9292 --gpu_id 0 +python -m paddle_serving_server.serve --model serving_server --port 9292 --gpu_id 0 ``` ### 执行预测 diff --git a/python/examples/criteo_ctr/README.md b/python/examples/criteo_ctr/README.md index 2e9c5c537..46be4d0ae 100644 --- a/python/examples/criteo_ctr/README.md +++ b/python/examples/criteo_ctr/README.md @@ -20,7 +20,7 @@ the directories like `ctr_serving_model` and `ctr_client_conf` will appear. ``` python -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 #CPU RPC Service -python -m paddle_serving_server_gpu.serve --model ctr_serving_model/ --port 9292 --gpu_ids 0 #RPC Service on GPU 0 +python -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 --gpu_ids 0 #RPC Service on GPU 0 ``` ### RPC Infer diff --git a/python/examples/criteo_ctr/README_CN.md b/python/examples/criteo_ctr/README_CN.md index 0fd8fd5ec..c7d6255e0 100644 --- a/python/examples/criteo_ctr/README_CN.md +++ b/python/examples/criteo_ctr/README_CN.md @@ -20,7 +20,7 @@ mv models/ctr_serving_model . ``` python -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 #启动CPU预测服务 -python -m paddle_serving_server_gpu.serve --model ctr_serving_model/ --port 9292 --gpu_ids 0 #在GPU 0上启动预测服务 +python -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 --gpu_ids 0 #在GPU 0上启动预测服务 ``` ### 执行预测 diff --git a/python/examples/deeplabv3/README.md b/python/examples/deeplabv3/README.md index 3eb5c84e2..28bec77bb 100644 --- a/python/examples/deeplabv3/README.md +++ b/python/examples/deeplabv3/README.md @@ -12,7 +12,7 @@ tar -xzvf deeplabv3.tar.gz ### Start Service ``` -python -m paddle_serving_server_gpu.serve --model deeplabv3_server --gpu_ids 0 --port 9494 +python -m paddle_serving_server.serve --model deeplabv3_server --gpu_ids 0 --port 9494 ``` ### Client Prediction diff --git a/python/examples/deeplabv3/README_CN.md b/python/examples/deeplabv3/README_CN.md index a25bb2d05..6de3c4208 100644 --- a/python/examples/deeplabv3/README_CN.md +++ b/python/examples/deeplabv3/README_CN.md @@ -12,7 +12,7 @@ tar -xzvf deeplabv3.tar.gz ### 启动服务端 ``` -python -m paddle_serving_server_gpu.serve --model deeplabv3_server --gpu_ids 0 --port 9494 +python -m paddle_serving_server.serve --model deeplabv3_server --gpu_ids 0 --port 9494 ``` ### 客户端预测 diff --git a/python/examples/detection/faster_rcnn_r50_fpn_1x_coco/README.md b/python/examples/detection/faster_rcnn_r50_fpn_1x_coco/README.md index 96ab08bf8..a755b33cb 100644 --- a/python/examples/detection/faster_rcnn_r50_fpn_1x_coco/README.md +++ b/python/examples/detection/faster_rcnn_r50_fpn_1x_coco/README.md @@ -10,7 +10,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ### Start the service ``` tar xf faster_rcnn_r50_fpn_1x_coco.tar -python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0 +python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0 ``` This model support TensorRT, if you want a faster inference, please use `--use_trt`. diff --git a/python/examples/detection/faster_rcnn_r50_fpn_1x_coco/README_CN.md b/python/examples/detection/faster_rcnn_r50_fpn_1x_coco/README_CN.md index 1fcced941..47f0aca10 100644 --- a/python/examples/detection/faster_rcnn_r50_fpn_1x_coco/README_CN.md +++ b/python/examples/detection/faster_rcnn_r50_fpn_1x_coco/README_CN.md @@ -11,7 +11,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ### 启动服务 ``` tar xf faster_rcnn_r50_fpn_1x_coco.tar -python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0 +python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0 ``` 该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。 diff --git a/python/examples/detection/ppyolo_r50vd_dcn_1x_coco/README.md b/python/examples/detection/ppyolo_r50vd_dcn_1x_coco/README.md index 7c31f2046..8c3d5142a 100644 --- a/python/examples/detection/ppyolo_r50vd_dcn_1x_coco/README.md +++ b/python/examples/detection/ppyolo_r50vd_dcn_1x_coco/README.md @@ -10,7 +10,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ### Start the service ``` tar xf ppyolo_r50vd_dcn_1x_coco.tar -python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0 +python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0 ``` This model support TensorRT, if you want a faster inference, please use `--use_trt`. diff --git a/python/examples/detection/ppyolo_r50vd_dcn_1x_coco/README_CN.md b/python/examples/detection/ppyolo_r50vd_dcn_1x_coco/README_CN.md index f403246b6..1aebb8db9 100644 --- a/python/examples/detection/ppyolo_r50vd_dcn_1x_coco/README_CN.md +++ b/python/examples/detection/ppyolo_r50vd_dcn_1x_coco/README_CN.md @@ -11,7 +11,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ### 启动服务 ``` tar xf ppyolo_r50vd_dcn_1x_coco.tar -python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0 +python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0 ``` 该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。 diff --git a/python/examples/detection/ttfnet_darknet53_1x_coco/README.md b/python/examples/detection/ttfnet_darknet53_1x_coco/README.md index 6b86e7d79..58c538b7c 100644 --- a/python/examples/detection/ttfnet_darknet53_1x_coco/README.md +++ b/python/examples/detection/ttfnet_darknet53_1x_coco/README.md @@ -10,7 +10,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ### Start the service ``` tar xf ttfnet_darknet53_1x_coco.tar -python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0 +python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0 ``` This model support TensorRT, if you want a faster inference, please use `--use_trt`. diff --git a/python/examples/detection/ttfnet_darknet53_1x_coco/README_CN.md b/python/examples/detection/ttfnet_darknet53_1x_coco/README_CN.md index 976c94a4d..641086cd2 100644 --- a/python/examples/detection/ttfnet_darknet53_1x_coco/README_CN.md +++ b/python/examples/detection/ttfnet_darknet53_1x_coco/README_CN.md @@ -11,7 +11,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ### 启动服务 ``` tar xf ttfnet_darknet53_1x_coco.tar -python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0 +python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0 ``` 该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。 diff --git a/python/examples/detection/yolov3_darknet53_270e_coco/README.md b/python/examples/detection/yolov3_darknet53_270e_coco/README.md index 702379452..6357c3030 100644 --- a/python/examples/detection/yolov3_darknet53_270e_coco/README.md +++ b/python/examples/detection/yolov3_darknet53_270e_coco/README.md @@ -10,7 +10,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ### Start the service ``` tar xf yolov3_darknet53_270e_coco.tar -python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0 +python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0 ``` This model support TensorRT, if you want a faster inference, please use `--use_trt`. diff --git a/python/examples/detection/yolov3_darknet53_270e_coco/README_CN.md b/python/examples/detection/yolov3_darknet53_270e_coco/README_CN.md index 0dd69e25c..166d562e7 100644 --- a/python/examples/detection/yolov3_darknet53_270e_coco/README_CN.md +++ b/python/examples/detection/yolov3_darknet53_270e_coco/README_CN.md @@ -11,7 +11,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ### 启动服务 ``` tar xf yolov3_darknet53_270e_coco.tar -python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0 +python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0 ``` 该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。 diff --git a/python/examples/encryption/README.md b/python/examples/encryption/README.md index fd104a7a8..87664e4ca 100644 --- a/python/examples/encryption/README.md +++ b/python/examples/encryption/README.md @@ -26,7 +26,7 @@ python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_ ``` GPU Service ``` -python -m paddle_serving_server_gpu.serve --model encrypt_server/ --port 9300 --use_encryption_model --gpu_ids 0 +python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_encryption_model --gpu_ids 0 ``` ## Prediction diff --git a/python/examples/encryption/README_CN.md b/python/examples/encryption/README_CN.md index 6a33ef906..ed8fec338 100644 --- a/python/examples/encryption/README_CN.md +++ b/python/examples/encryption/README_CN.md @@ -24,7 +24,7 @@ python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_ ``` GPU预测服务 ``` -python -m paddle_serving_server_gpu.serve --model encrypt_server/ --port 9300 --use_encryption_model --gpu_ids 0 +python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_encryption_model --gpu_ids 0 ``` ## 预测 diff --git a/python/examples/grpc_impl_example/fit_a_line/test_server_gpu.py b/python/examples/grpc_impl_example/fit_a_line/test_server_gpu.py index 1547ee445..62361d999 100644 --- a/python/examples/grpc_impl_example/fit_a_line/test_server_gpu.py +++ b/python/examples/grpc_impl_example/fit_a_line/test_server_gpu.py @@ -15,9 +15,9 @@ import os import sys -from paddle_serving_server_gpu import OpMaker -from paddle_serving_server_gpu import OpSeqMaker -from paddle_serving_server_gpu import MultiLangServer as Server +from paddle_serving_server import OpMaker +from paddle_serving_server import OpSeqMaker +from paddle_serving_server import MultiLangServer as Server op_maker = OpMaker() read_op = op_maker.create('general_reader') diff --git a/python/examples/grpc_impl_example/yolov4/README.md b/python/examples/grpc_impl_example/yolov4/README.md index a04215dcf..b468a7f6a 100644 --- a/python/examples/grpc_impl_example/yolov4/README.md +++ b/python/examples/grpc_impl_example/yolov4/README.md @@ -12,7 +12,7 @@ tar -xzvf yolov4.tar.gz ## Start RPC Service ``` -python -m paddle_serving_server_gpu.serve --model yolov4_model --port 9393 --gpu_ids 0 --use_multilang +python -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0 --use_multilang ``` ## Prediction diff --git a/python/examples/grpc_impl_example/yolov4/README_CN.md b/python/examples/grpc_impl_example/yolov4/README_CN.md index de7a85b59..991d2ee22 100644 --- a/python/examples/grpc_impl_example/yolov4/README_CN.md +++ b/python/examples/grpc_impl_example/yolov4/README_CN.md @@ -12,7 +12,7 @@ tar -xzvf yolov4.tar.gz ## 启动RPC服务 ``` -python -m paddle_serving_server_gpu.serve --model yolov4_model --port 9393 --gpu_ids 0 --use_multilang +python -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0 --use_multilang ``` ## 预测 diff --git a/python/examples/imagenet/README.md b/python/examples/imagenet/README.md index 415818e71..ad8b12b5b 100644 --- a/python/examples/imagenet/README.md +++ b/python/examples/imagenet/README.md @@ -39,7 +39,7 @@ python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 #cpu ``` ``` -python -m paddle_serving_server_gpu.serve --model ResNet50_vd_model --port 9696 --gpu_ids 0 #gpu inference service +python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 --gpu_ids 0 #gpu inference service ``` client send inference request diff --git a/python/examples/imagenet/README_CN.md b/python/examples/imagenet/README_CN.md index 081cff528..8650d51a6 100644 --- a/python/examples/imagenet/README_CN.md +++ b/python/examples/imagenet/README_CN.md @@ -39,7 +39,7 @@ python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 #cpu ``` ``` -python -m paddle_serving_server_gpu.serve --model ResNet50_vd_model --port 9696 --gpu_ids 0 #gpu预测服务 +python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 --gpu_ids 0 #gpu预测服务 ``` client端进行预测 diff --git a/python/examples/imagenet/benchmark.sh b/python/examples/imagenet/benchmark.sh index 620cf2a3d..99bda3c84 100644 --- a/python/examples/imagenet/benchmark.sh +++ b/python/examples/imagenet/benchmark.sh @@ -2,7 +2,7 @@ rm profile_log* export CUDA_VISIBLE_DEVICES=0,1,2,3 export FLAGS_profile_server=1 export FLAGS_profile_client=1 -python -m paddle_serving_server_gpu.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim --ir_optim 2> elog > stdlog & +python -m paddle_serving_server.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim --ir_optim 2> elog > stdlog & sleep 5 gpu_id=0 diff --git a/python/examples/imagenet/resnet50_web_service.py b/python/examples/imagenet/resnet50_web_service.py index 703310371..ca111615d 100644 --- a/python/examples/imagenet/resnet50_web_service.py +++ b/python/examples/imagenet/resnet50_web_service.py @@ -25,7 +25,7 @@ if device == "cpu": from paddle_serving_server.web_service import WebService else: - from paddle_serving_server_gpu.web_service import WebService + from paddle_serving_server.web_service import WebService class ImageService(WebService): diff --git a/python/examples/mobilenet/README.md b/python/examples/mobilenet/README.md index 496ebdbe2..4a808026a 100644 --- a/python/examples/mobilenet/README.md +++ b/python/examples/mobilenet/README.md @@ -12,7 +12,7 @@ tar -xzvf mobilenet_v2_imagenet.tar.gz ### Start Service ``` -python -m paddle_serving_server_gpu.serve --model mobilenet_v2_imagenet_model --gpu_ids 0 --port 9393 +python -m paddle_serving_server.serve --model mobilenet_v2_imagenet_model --gpu_ids 0 --port 9393 ``` ### Client Prediction diff --git a/python/examples/mobilenet/README_CN.md b/python/examples/mobilenet/README_CN.md index 7c721b4bd..d4f91837e 100644 --- a/python/examples/mobilenet/README_CN.md +++ b/python/examples/mobilenet/README_CN.md @@ -12,7 +12,7 @@ tar -xzvf mobilenet_v2_imagenet.tar.gz ### 启动服务端 ``` -python -m paddle_serving_server_gpu.serve --model mobilenet_v2_imagenet_model --gpu_ids 0 --port 9393 +python -m paddle_serving_server.serve --model mobilenet_v2_imagenet_model --gpu_ids 0 --port 9393 ``` ### 客户端预测 diff --git a/python/examples/ocr/README.md b/python/examples/ocr/README.md index 680376a07..aefcafac5 100644 --- a/python/examples/ocr/README.md +++ b/python/examples/ocr/README.md @@ -26,7 +26,7 @@ tar xf test_imgs.tar python -m paddle_serving_server.serve --model ocr_det_model --port 9293 python ocr_web_server.py cpu #for gpu user -python -m paddle_serving_server_gpu.serve --model ocr_det_model --port 9293 --gpu_id 0 +python -m paddle_serving_server.serve --model ocr_det_model --port 9293 --gpu_id 0 python ocr_web_server.py gpu ``` diff --git a/python/examples/ocr/README_CN.md b/python/examples/ocr/README_CN.md index 52663bfd3..997cf872e 100644 --- a/python/examples/ocr/README_CN.md +++ b/python/examples/ocr/README_CN.md @@ -25,7 +25,7 @@ tar xf test_imgs.tar python -m paddle_serving_server.serve --model ocr_det_model --port 9293 python ocr_web_server.py cpu #for gpu user -python -m paddle_serving_server_gpu.serve --model ocr_det_model --port 9293 --gpu_id 0 +python -m paddle_serving_server.serve --model ocr_det_model --port 9293 --gpu_id 0 python ocr_web_server.py gpu ``` diff --git a/python/examples/ocr/det_debugger_server.py b/python/examples/ocr/det_debugger_server.py index ebaf0a306..8c8305012 100644 --- a/python/examples/ocr/det_debugger_server.py +++ b/python/examples/ocr/det_debugger_server.py @@ -22,7 +22,7 @@ from paddle_serving_app.reader import Div, Normalize, Transpose from paddle_serving_app.reader import DBPostProcess, FilterBoxes if sys.argv[1] == 'gpu': - from paddle_serving_server_gpu.web_service import WebService + from paddle_serving_server.web_service import WebService elif sys.argv[1] == 'cpu': from paddle_serving_server.web_service import WebService import time diff --git a/python/examples/ocr/det_web_server.py b/python/examples/ocr/det_web_server.py index e90efc781..c72dc6af9 100644 --- a/python/examples/ocr/det_web_server.py +++ b/python/examples/ocr/det_web_server.py @@ -22,7 +22,7 @@ from paddle_serving_app.reader import Div, Normalize, Transpose from paddle_serving_app.reader import DBPostProcess, FilterBoxes if sys.argv[1] == 'gpu': - from paddle_serving_server_gpu.web_service import WebService + from paddle_serving_server.web_service import WebService elif sys.argv[1] == 'cpu': from paddle_serving_server.web_service import WebService import time diff --git a/python/examples/ocr/ocr_debugger_server.py b/python/examples/ocr/ocr_debugger_server.py index 163ff9788..85bb4f0c4 100644 --- a/python/examples/ocr/ocr_debugger_server.py +++ b/python/examples/ocr/ocr_debugger_server.py @@ -23,7 +23,7 @@ from paddle_serving_app.reader import Div, Normalize, Transpose from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes if sys.argv[1] == 'gpu': - from paddle_serving_server_gpu.web_service import WebService + from paddle_serving_server.web_service import WebService elif sys.argv[1] == 'cpu': from paddle_serving_server.web_service import WebService from paddle_serving_app.local_predict import LocalPredictor diff --git a/python/examples/ocr/ocr_web_server.py b/python/examples/ocr/ocr_web_server.py index 97e619f2b..56cacc0e3 100644 --- a/python/examples/ocr/ocr_web_server.py +++ b/python/examples/ocr/ocr_web_server.py @@ -23,7 +23,7 @@ from paddle_serving_app.reader import Div, Normalize, Transpose from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes if sys.argv[1] == 'gpu': - from paddle_serving_server_gpu.web_service import WebService + from paddle_serving_server.web_service import WebService elif sys.argv[1] == 'cpu': from paddle_serving_server.web_service import WebService import time diff --git a/python/examples/ocr/rec_debugger_server.py b/python/examples/ocr/rec_debugger_server.py index f2b3d0705..5775feb71 100644 --- a/python/examples/ocr/rec_debugger_server.py +++ b/python/examples/ocr/rec_debugger_server.py @@ -23,7 +23,7 @@ from paddle_serving_app.reader import Div, Normalize, Transpose from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes if sys.argv[1] == 'gpu': - from paddle_serving_server_gpu.web_service import WebService + from paddle_serving_server.web_service import WebService elif sys.argv[1] == 'cpu': from paddle_serving_server.web_service import WebService import time diff --git a/python/examples/ocr/rec_web_server.py b/python/examples/ocr/rec_web_server.py index a3de120af..61669fddf 100644 --- a/python/examples/ocr/rec_web_server.py +++ b/python/examples/ocr/rec_web_server.py @@ -23,7 +23,7 @@ from paddle_serving_app.reader import Div, Normalize, Transpose from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes if sys.argv[1] == 'gpu': - from paddle_serving_server_gpu.web_service import WebService + from paddle_serving_server.web_service import WebService elif sys.argv[1] == 'cpu': from paddle_serving_server.web_service import WebService import time diff --git a/python/examples/pipeline/imagenet/pipeline_rpc_client.py b/python/examples/pipeline/imagenet/pipeline_rpc_client.py index 77157359e..573ef4c8f 100644 --- a/python/examples/pipeline/imagenet/pipeline_rpc_client.py +++ b/python/examples/pipeline/imagenet/pipeline_rpc_client.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. try: - from paddle_serving_server_gpu.pipeline import PipelineClient + from paddle_serving_server.pipeline import PipelineClient except ImportError: from paddle_serving_server.pipeline import PipelineClient import numpy as np diff --git a/python/examples/pipeline/imagenet/resnet50_web_service.py b/python/examples/pipeline/imagenet/resnet50_web_service.py index ece3befee..d6bb793a8 100644 --- a/python/examples/pipeline/imagenet/resnet50_web_service.py +++ b/python/examples/pipeline/imagenet/resnet50_web_service.py @@ -14,7 +14,7 @@ import sys from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage try: - from paddle_serving_server_gpu.web_service import WebService, Op + from paddle_serving_server.web_service import WebService, Op except ImportError: from paddle_serving_server.web_service import WebService, Op import logging diff --git a/python/examples/pipeline/imdb_model_ensemble/test_pipeline_server.py b/python/examples/pipeline/imdb_model_ensemble/test_pipeline_server.py index 35171a391..8ff408550 100644 --- a/python/examples/pipeline/imdb_model_ensemble/test_pipeline_server.py +++ b/python/examples/pipeline/imdb_model_ensemble/test_pipeline_server.py @@ -22,7 +22,7 @@ try: from paddle_serving_server.web_service import WebService except ImportError: - from paddle_serving_server_gpu.web_service import WebService + from paddle_serving_server.web_service import WebService _LOGGER = logging.getLogger() user_handler = logging.StreamHandler() diff --git a/python/examples/pipeline/ocr/pipeline_rpc_client.py b/python/examples/pipeline/ocr/pipeline_rpc_client.py index ec721ec35..66faa0428 100644 --- a/python/examples/pipeline/ocr/pipeline_rpc_client.py +++ b/python/examples/pipeline/ocr/pipeline_rpc_client.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. try: - from paddle_serving_server_gpu.pipeline import PipelineClient + from paddle_serving_server.pipeline import PipelineClient except ImportError: from paddle_serving_server.pipeline import PipelineClient import numpy as np diff --git a/python/examples/pipeline/ocr/web_service.py b/python/examples/pipeline/ocr/web_service.py index 7e9dd3141..60a39a4c0 100644 --- a/python/examples/pipeline/ocr/web_service.py +++ b/python/examples/pipeline/ocr/web_service.py @@ -14,7 +14,7 @@ try: from paddle_serving_server.web_service import WebService, Op except ImportError: - from paddle_serving_server_gpu.web_service import WebService, Op + from paddle_serving_server.web_service import WebService, Op import logging import numpy as np import cv2 diff --git a/python/examples/pipeline/simple_web_service/web_service.py b/python/examples/pipeline/simple_web_service/web_service.py index 4a90286fb..f85d64ae7 100644 --- a/python/examples/pipeline/simple_web_service/web_service.py +++ b/python/examples/pipeline/simple_web_service/web_service.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. try: - from paddle_serving_server_gpu.web_service import WebService, Op + from paddle_serving_server.web_service import WebService, Op except ImportError: from paddle_serving_server.web_service import WebService, Op import logging diff --git a/python/examples/resnet_v2_50/README.md b/python/examples/resnet_v2_50/README.md index fd86074c7..0279918b6 100644 --- a/python/examples/resnet_v2_50/README.md +++ b/python/examples/resnet_v2_50/README.md @@ -12,7 +12,7 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz ### Start Service ``` -python -m paddle_serving_server_gpu.serve --model resnet_v2_50_imagenet_model --gpu_ids 0 --port 9393 +python -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --gpu_ids 0 --port 9393 ``` ### Client Prediction diff --git a/python/examples/resnet_v2_50/README_CN.md b/python/examples/resnet_v2_50/README_CN.md index bda2916eb..c67e4f7c3 100644 --- a/python/examples/resnet_v2_50/README_CN.md +++ b/python/examples/resnet_v2_50/README_CN.md @@ -12,7 +12,7 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz ### 启动服务端 ``` -python -m paddle_serving_server_gpu.serve --model resnet_v2_50_imagenet_model --gpu_ids 0 --port 9393 +python -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --gpu_ids 0 --port 9393 ``` ### 客户端预测 diff --git a/python/examples/unet_for_image_seg/README.md b/python/examples/unet_for_image_seg/README.md index 7936ad43c..170dc133a 100644 --- a/python/examples/unet_for_image_seg/README.md +++ b/python/examples/unet_for_image_seg/README.md @@ -12,7 +12,7 @@ tar -xzvf unet.tar.gz ### Start Service ``` -python -m paddle_serving_server_gpu.serve --model unet_model --gpu_ids 0 --port 9494 +python -m paddle_serving_server.serve --model unet_model --gpu_ids 0 --port 9494 ``` ### Client Prediction diff --git a/python/examples/unet_for_image_seg/README_CN.md b/python/examples/unet_for_image_seg/README_CN.md index f4b91aaff..eed1313eb 100644 --- a/python/examples/unet_for_image_seg/README_CN.md +++ b/python/examples/unet_for_image_seg/README_CN.md @@ -12,7 +12,7 @@ tar -xzvf unet.tar.gz ### 启动服务端 ``` -python -m paddle_serving_server_gpu.serve --model unet_model --gpu_ids 0 --port 9494 +python -m paddle_serving_server.serve --model unet_model --gpu_ids 0 --port 9494 ``` ### 客户端预测 diff --git a/python/examples/xpu/fit_a_line_xpu/README.md b/python/examples/xpu/fit_a_line_xpu/README.md index 15c5eecbc..e962bcf34 100644 --- a/python/examples/xpu/fit_a_line_xpu/README.md +++ b/python/examples/xpu/fit_a_line_xpu/README.md @@ -15,7 +15,7 @@ sh get_data.sh ### Start server ```shell -python -m paddle_serving_server_gpu.serve --model uci_housing_model --thread 10 --port 9393 --use_lite --use_xpu --ir_optim +python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_lite --use_xpu --ir_optim ``` ### Client prediction diff --git a/python/examples/xpu/fit_a_line_xpu/test_server.py b/python/examples/xpu/fit_a_line_xpu/test_server.py index 43a690f2f..a53e89f7e 100644 --- a/python/examples/xpu/fit_a_line_xpu/test_server.py +++ b/python/examples/xpu/fit_a_line_xpu/test_server.py @@ -13,7 +13,7 @@ # limitations under the License. # pylint: disable=doc-string-missing -from paddle_serving_server_gpu.web_service import WebService +from paddle_serving_server.web_service import WebService import numpy as np diff --git a/python/examples/xpu/resnet_v2_50_xpu/README.md b/python/examples/xpu/resnet_v2_50_xpu/README.md index 28a2d6017..fc819dd4b 100644 --- a/python/examples/xpu/resnet_v2_50_xpu/README.md +++ b/python/examples/xpu/resnet_v2_50_xpu/README.md @@ -12,7 +12,7 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz ### Start Service ``` -python -m paddle_serving_server_gpu.serve --model resnet_v2_50_imagenet_model --port 9393 --use_lite --use_xpu --ir_optim +python -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --port 9393 --use_lite --use_xpu --ir_optim ``` ### Client Prediction diff --git a/python/examples/xpu/resnet_v2_50_xpu/README_CN.md b/python/examples/xpu/resnet_v2_50_xpu/README_CN.md index d08eb0e46..7f44817ae 100644 --- a/python/examples/xpu/resnet_v2_50_xpu/README_CN.md +++ b/python/examples/xpu/resnet_v2_50_xpu/README_CN.md @@ -12,7 +12,7 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz ### 启动服务端 ``` -python -m paddle_serving_server_gpu.serve --model resnet_v2_50_imagenet_model --port 9393 --use_lite --use_xpu --ir_optim +python -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --port 9393 --use_lite --use_xpu --ir_optim ``` ### 客户端预测 diff --git a/python/examples/yolov4/README.md b/python/examples/yolov4/README.md index 08e16026d..fb1bc7622 100644 --- a/python/examples/yolov4/README.md +++ b/python/examples/yolov4/README.md @@ -12,7 +12,7 @@ tar -xzvf yolov4.tar.gz ## Start RPC Service ``` -python -m paddle_serving_server_gpu.serve --model yolov4_model --port 9393 --gpu_ids 0 +python -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0 ``` ## Prediction diff --git a/python/examples/yolov4/README_CN.md b/python/examples/yolov4/README_CN.md index a4eed96b0..72923c5af 100644 --- a/python/examples/yolov4/README_CN.md +++ b/python/examples/yolov4/README_CN.md @@ -12,7 +12,7 @@ tar -xzvf yolov4.tar.gz ## 启动RPC服务 ``` -python -m paddle_serving_server_gpu.serve --model yolov4_model --port 9393 --gpu_ids 0 +python -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0 ``` ## 预测 diff --git a/python/paddle_serving_client/__init__.py b/python/paddle_serving_client/__init__.py index a00fd298d..f0a7c03ac 100644 --- a/python/paddle_serving_client/__init__.py +++ b/python/paddle_serving_client/__init__.py @@ -13,703 +13,25 @@ # limitations under the License. # pylint: disable=doc-string-missing -import paddle_serving_client -import os -from .proto import sdk_configure_pb2 as sdk -from .proto import general_model_config_pb2 as m_config -import google.protobuf.text_format -import numpy as np -import requests -import json -import base64 -import time -import sys - -import grpc -from .proto import multi_lang_general_model_service_pb2 -sys.path.append( - os.path.join(os.path.abspath(os.path.dirname(__file__)), 'proto')) -from .proto import multi_lang_general_model_service_pb2_grpc - -int64_type = 0 -float32_type = 1 -int32_type = 2 -int_type = set([int64_type, int32_type]) -float_type = set([float32_type]) - - -class _NOPProfiler(object): - def record(self, name): - pass - - def print_profile(self): - pass - - -class _TimeProfiler(object): - def __init__(self): - self.pid = os.getpid() - self.print_head = 'PROFILE\tpid:{}\t'.format(self.pid) - self.time_record = [self.print_head] - - def record(self, name): - self.time_record.append('{}:{} '.format( - name, int(round(time.time() * 1000000)))) - - def print_profile(self): - self.time_record.append('\n') - sys.stderr.write(''.join(self.time_record)) - self.time_record = [self.print_head] - - -_is_profile = int(os.environ.get('FLAGS_profile_client', 0)) -_Profiler = _TimeProfiler if _is_profile else _NOPProfiler - - -class SDKConfig(object): - def __init__(self): - self.sdk_desc = sdk.SDKConf() - self.tag_list = [] - self.cluster_list = [] - self.variant_weight_list = [] - self.rpc_timeout_ms = 20000 - self.load_balance_strategy = "la" - - def add_server_variant(self, tag, cluster, variant_weight): - self.tag_list.append(tag) - self.cluster_list.append(cluster) - self.variant_weight_list.append(variant_weight) - - def set_load_banlance_strategy(self, strategy): - self.load_balance_strategy = strategy - - def gen_desc(self, rpc_timeout_ms): - predictor_desc = sdk.Predictor() - predictor_desc.name = "general_model" - predictor_desc.service_name = \ - "baidu.paddle_serving.predictor.general_model.GeneralModelService" - predictor_desc.endpoint_router = "WeightedRandomRender" - predictor_desc.weighted_random_render_conf.variant_weight_list = "|".join( - self.variant_weight_list) - - for idx, tag in enumerate(self.tag_list): - variant_desc = sdk.VariantConf() - variant_desc.tag = tag - variant_desc.naming_conf.cluster = "list://{}".format(",".join( - self.cluster_list[idx])) - predictor_desc.variants.extend([variant_desc]) - - self.sdk_desc.predictors.extend([predictor_desc]) - self.sdk_desc.default_variant_conf.tag = "default" - self.sdk_desc.default_variant_conf.connection_conf.connect_timeout_ms = 2000 - self.sdk_desc.default_variant_conf.connection_conf.rpc_timeout_ms = rpc_timeout_ms - self.sdk_desc.default_variant_conf.connection_conf.connect_retry_count = 2 - self.sdk_desc.default_variant_conf.connection_conf.max_connection_per_host = 100 - self.sdk_desc.default_variant_conf.connection_conf.hedge_request_timeout_ms = -1 - self.sdk_desc.default_variant_conf.connection_conf.hedge_fetch_retry_count = 2 - self.sdk_desc.default_variant_conf.connection_conf.connection_type = "pooled" - - self.sdk_desc.default_variant_conf.naming_conf.cluster_filter_strategy = "Default" - self.sdk_desc.default_variant_conf.naming_conf.load_balance_strategy = "la" - - self.sdk_desc.default_variant_conf.rpc_parameter.compress_type = 0 - self.sdk_desc.default_variant_conf.rpc_parameter.package_size = 20 - self.sdk_desc.default_variant_conf.rpc_parameter.protocol = "baidu_std" - self.sdk_desc.default_variant_conf.rpc_parameter.max_channel_per_request = 3 - - return self.sdk_desc - - -class Client(object): - def __init__(self): - self.feed_names_ = [] - self.fetch_names_ = [] - self.client_handle_ = None - self.feed_shapes_ = {} - self.feed_types_ = {} - self.feed_names_to_idx_ = {} - self.pid = os.getpid() - self.predictor_sdk_ = None - self.producers = [] - self.consumer = None - self.profile_ = _Profiler() - self.all_numpy_input = True - self.has_numpy_input = False - self.rpc_timeout_ms = 20000 - from .serving_client import PredictorRes - self.predictorres_constructor = PredictorRes - - def load_client_config(self, path): - from .serving_client import PredictorClient - model_conf = m_config.GeneralModelConfig() - f = open(path, 'r') - model_conf = google.protobuf.text_format.Merge( - str(f.read()), model_conf) - - # load configuraion here - # get feed vars, fetch vars - # get feed shapes, feed types - # map feed names to index - self.client_handle_ = PredictorClient() - self.client_handle_.init(path) - if "FLAGS_max_body_size" not in os.environ: - os.environ["FLAGS_max_body_size"] = str(512 * 1024 * 1024) - read_env_flags = ["profile_client", "profile_server", "max_body_size"] - self.client_handle_.init_gflags([sys.argv[ - 0]] + ["--tryfromenv=" + ",".join(read_env_flags)]) - self.feed_names_ = [var.alias_name for var in model_conf.feed_var] - self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var] - self.feed_names_to_idx_ = {} - self.fetch_names_to_type_ = {} - self.fetch_names_to_idx_ = {} - self.lod_tensor_set = set() - self.feed_tensor_len = {} - self.key = None - - for i, var in enumerate(model_conf.feed_var): - self.feed_names_to_idx_[var.alias_name] = i - self.feed_types_[var.alias_name] = var.feed_type - self.feed_shapes_[var.alias_name] = var.shape - - if var.is_lod_tensor: - self.lod_tensor_set.add(var.alias_name) - else: - counter = 1 - for dim in self.feed_shapes_[var.alias_name]: - counter *= dim - self.feed_tensor_len[var.alias_name] = counter - for i, var in enumerate(model_conf.fetch_var): - self.fetch_names_to_idx_[var.alias_name] = i - self.fetch_names_to_type_[var.alias_name] = var.fetch_type - if var.is_lod_tensor: - self.lod_tensor_set.add(var.alias_name) - return - - def add_variant(self, tag, cluster, variant_weight): - if self.predictor_sdk_ is None: - self.predictor_sdk_ = SDKConfig() - self.predictor_sdk_.add_server_variant(tag, cluster, - str(variant_weight)) - - def set_rpc_timeout_ms(self, rpc_timeout): - if not isinstance(rpc_timeout, int): - raise ValueError("rpc_timeout must be int type.") - else: - self.rpc_timeout_ms = rpc_timeout - - def use_key(self, key_filename): - with open(key_filename, "rb") as f: - self.key = f.read() - - def get_serving_port(self, endpoints): - if self.key is not None: - req = json.dumps({"key": base64.b64encode(self.key).decode()}) - else: - req = json.dumps({}) - r = requests.post("http://" + endpoints[0], req) - result = r.json() - print(result) - if "endpoint_list" not in result: - raise ValueError("server not ready") - else: - endpoints = [ - endpoints[0].split(":")[0] + ":" + - str(result["endpoint_list"][0]) - ] - return endpoints - - def connect(self, endpoints=None, encryption=False): - # check whether current endpoint is available - # init from client config - # create predictor here - if endpoints is None: - if self.predictor_sdk_ is None: - raise ValueError( - "You must set the endpoints parameter or use add_variant function to create a variant." - ) - else: - if encryption: - endpoints = self.get_serving_port(endpoints) - if self.predictor_sdk_ is None: - self.add_variant('default_tag_{}'.format(id(self)), endpoints, - 100) - else: - print( - "parameter endpoints({}) will not take effect, because you use the add_variant function.". - format(endpoints)) - sdk_desc = self.predictor_sdk_.gen_desc(self.rpc_timeout_ms) - self.client_handle_.create_predictor_by_desc(sdk_desc.SerializeToString( - )) - - def get_feed_names(self): - return self.feed_names_ - - def get_fetch_names(self): - return self.fetch_names_ - - def shape_check(self, feed, key): - if key in self.lod_tensor_set: - return - if isinstance(feed[key], - list) and len(feed[key]) != self.feed_tensor_len[key]: - raise ValueError("The shape of feed tensor {} not match.".format( - key)) - if type(feed[key]).__module__ == np.__name__ and np.size(feed[ - key]) != self.feed_tensor_len[key]: - #raise SystemExit("The shape of feed tensor {} not match.".format( - # key)) - pass - - def predict(self, - feed=None, - fetch=None, - batch=False, - need_variant_tag=False, - log_id=0): - self.profile_.record('py_prepro_0') - - if feed is None or fetch is None: - raise ValueError("You should specify feed and fetch for prediction") - - fetch_list = [] - if isinstance(fetch, str): - fetch_list = [fetch] - elif isinstance(fetch, list): - fetch_list = fetch - else: - raise ValueError("Fetch only accepts string and list of string") - - feed_batch = [] - if isinstance(feed, dict): - feed_batch.append(feed) - elif isinstance(feed, list): - feed_batch = feed - else: - raise ValueError("Feed only accepts dict and list of dict") - - int_slot_batch = [] - float_slot_batch = [] - int_feed_names = [] - float_feed_names = [] - int_shape = [] - int_lod_slot_batch = [] - float_lod_slot_batch = [] - float_shape = [] - - fetch_names = [] - counter = 0 - batch_size = len(feed_batch) - - for key in fetch_list: - if key in self.fetch_names_: - fetch_names.append(key) - - if len(fetch_names) == 0: - raise ValueError( - "Fetch names should not be empty or out of saved fetch list.") - return {} - - for i, feed_i in enumerate(feed_batch): - int_slot = [] - float_slot = [] - int_lod_slot = [] - float_lod_slot = [] - for key in feed_i: - if ".lod" not in key and key not in self.feed_names_: - raise ValueError("Wrong feed name: {}.".format(key)) - if ".lod" in key: - continue - #if not isinstance(feed_i[key], np.ndarray): - self.shape_check(feed_i, key) - if self.feed_types_[key] in int_type: - if i == 0: - int_feed_names.append(key) - shape_lst = [] - if batch == False: - feed_i[key] = feed_i[key][np.newaxis, :] - if isinstance(feed_i[key], np.ndarray): - shape_lst.extend(list(feed_i[key].shape)) - int_shape.append(shape_lst) - else: - int_shape.append(self.feed_shapes_[key]) - if "{}.lod".format(key) in feed_i: - int_lod_slot_batch.append(feed_i["{}.lod".format( - key)]) - else: - int_lod_slot_batch.append([]) - - if isinstance(feed_i[key], np.ndarray): - int_slot.append(feed_i[key]) - self.has_numpy_input = True - else: - int_slot.append(feed_i[key]) - self.all_numpy_input = False - - elif self.feed_types_[key] in float_type: - if i == 0: - float_feed_names.append(key) - shape_lst = [] - if batch == False: - feed_i[key] = feed_i[key][np.newaxis, :] - if isinstance(feed_i[key], np.ndarray): - shape_lst.extend(list(feed_i[key].shape)) - float_shape.append(shape_lst) - else: - float_shape.append(self.feed_shapes_[key]) - if "{}.lod".format(key) in feed_i: - float_lod_slot_batch.append(feed_i["{}.lod".format( - key)]) - else: - float_lod_slot_batch.append([]) - - if isinstance(feed_i[key], np.ndarray): - float_slot.append(feed_i[key]) - self.has_numpy_input = True - else: - float_slot.append(feed_i[key]) - self.all_numpy_input = False - int_slot_batch.append(int_slot) - float_slot_batch.append(float_slot) - int_lod_slot_batch.append(int_lod_slot) - float_lod_slot_batch.append(float_lod_slot) - - self.profile_.record('py_prepro_1') - self.profile_.record('py_client_infer_0') - - result_batch_handle = self.predictorres_constructor() - if self.all_numpy_input: - res = self.client_handle_.numpy_predict( - float_slot_batch, float_feed_names, float_shape, - float_lod_slot_batch, int_slot_batch, int_feed_names, int_shape, - int_lod_slot_batch, fetch_names, result_batch_handle, self.pid, - log_id) - elif self.has_numpy_input == False: - raise ValueError( - "Please make sure all of your inputs are numpy array") - else: - raise ValueError( - "Please make sure the inputs are all in list type or all in numpy.array type" - ) - - self.profile_.record('py_client_infer_1') - self.profile_.record('py_postpro_0') - - if res == -1: - return None - - multi_result_map = [] - model_engine_names = result_batch_handle.get_engine_names() - for mi, engine_name in enumerate(model_engine_names): - result_map = {} - # result map needs to be a numpy array - for i, name in enumerate(fetch_names): - if self.fetch_names_to_type_[name] == int64_type: - # result_map[name] will be py::array(numpy array) - result_map[name] = result_batch_handle.get_int64_by_name( - mi, name) - shape = result_batch_handle.get_shape(mi, name) - if result_map[name].size == 0: - raise ValueError( - "Failed to fetch, maybe the type of [{}]" - " is wrong, please check the model file".format( - name)) - result_map[name].shape = shape - if name in self.lod_tensor_set: - tmp_lod = result_batch_handle.get_lod(mi, name) - if np.size(tmp_lod) > 0: - result_map["{}.lod".format(name)] = tmp_lod - elif self.fetch_names_to_type_[name] == float32_type: - result_map[name] = result_batch_handle.get_float_by_name( - mi, name) - if result_map[name].size == 0: - raise ValueError( - "Failed to fetch, maybe the type of [{}]" - " is wrong, please check the model file".format( - name)) - shape = result_batch_handle.get_shape(mi, name) - result_map[name].shape = shape - if name in self.lod_tensor_set: - tmp_lod = result_batch_handle.get_lod(mi, name) - if np.size(tmp_lod) > 0: - result_map["{}.lod".format(name)] = tmp_lod - elif self.fetch_names_to_type_[name] == int32_type: - # result_map[name] will be py::array(numpy array) - result_map[name] = result_batch_handle.get_int32_by_name( - mi, name) - if result_map[name].size == 0: - raise ValueError( - "Failed to fetch, maybe the type of [{}]" - " is wrong, please check the model file".format( - name)) - shape = result_batch_handle.get_shape(mi, name) - result_map[name].shape = shape - if name in self.lod_tensor_set: - tmp_lod = result_batch_handle.get_lod(mi, name) - if np.size(tmp_lod) > 0: - result_map["{}.lod".format(name)] = tmp_lod - multi_result_map.append(result_map) - ret = None - if len(model_engine_names) == 1: - # If only one model result is returned, the format of ret is result_map - ret = multi_result_map[0] - else: - # If multiple model results are returned, the format of ret is {name: result_map} - ret = { - engine_name: multi_result_map[mi] - for mi, engine_name in enumerate(model_engine_names) - } - - self.profile_.record('py_postpro_1') - self.profile_.print_profile() - - # When using the A/B test, the tag of variant needs to be returned - return ret if not need_variant_tag else [ - ret, result_batch_handle.variant_tag() - ] - - def release(self): - self.client_handle_.destroy_predictor() - self.client_handle_ = None - - -class MultiLangClient(object): - def __init__(self): - self.channel_ = None - self.stub_ = None - self.rpc_timeout_s_ = 2 - self.profile_ = _Profiler() - - def add_variant(self, tag, cluster, variant_weight): - # TODO - raise Exception("cannot support ABtest yet") - - def set_rpc_timeout_ms(self, rpc_timeout): - if self.stub_ is None: - raise Exception("set timeout must be set after connect.") - if not isinstance(rpc_timeout, int): - # for bclient - raise ValueError("rpc_timeout must be int type.") - self.rpc_timeout_s_ = rpc_timeout / 1000.0 - timeout_req = multi_lang_general_model_service_pb2.SetTimeoutRequest() - timeout_req.timeout_ms = rpc_timeout - resp = self.stub_.SetTimeout(timeout_req) - return resp.err_code == 0 - - def connect(self, endpoints): - # https://github.com/tensorflow/serving/issues/1382 - options = [('grpc.max_receive_message_length', 512 * 1024 * 1024), - ('grpc.max_send_message_length', 512 * 1024 * 1024), - ('grpc.lb_policy_name', 'round_robin')] - # TODO: weight round robin - g_endpoint = 'ipv4:{}'.format(','.join(endpoints)) - self.channel_ = grpc.insecure_channel(g_endpoint, options=options) - self.stub_ = multi_lang_general_model_service_pb2_grpc.MultiLangGeneralModelServiceStub( - self.channel_) - # get client model config - get_client_config_req = multi_lang_general_model_service_pb2.GetClientConfigRequest( - ) - resp = self.stub_.GetClientConfig(get_client_config_req) - model_config_str = resp.client_config_str - self._parse_model_config(model_config_str) - - def _flatten_list(self, nested_list): - for item in nested_list: - if isinstance(item, (list, tuple)): - for sub_item in self._flatten_list(item): - yield sub_item - else: - yield item - - def _parse_model_config(self, model_config_str): - model_conf = m_config.GeneralModelConfig() - model_conf = google.protobuf.text_format.Merge(model_config_str, - model_conf) - self.feed_names_ = [var.alias_name for var in model_conf.feed_var] - self.feed_types_ = {} - self.feed_shapes_ = {} - self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var] - self.fetch_types_ = {} - self.lod_tensor_set_ = set() - for i, var in enumerate(model_conf.feed_var): - self.feed_types_[var.alias_name] = var.feed_type - self.feed_shapes_[var.alias_name] = var.shape - if var.is_lod_tensor: - self.lod_tensor_set_.add(var.alias_name) - else: - counter = 1 - for dim in self.feed_shapes_[var.alias_name]: - counter *= dim - for i, var in enumerate(model_conf.fetch_var): - self.fetch_types_[var.alias_name] = var.fetch_type - if var.is_lod_tensor: - self.lod_tensor_set_.add(var.alias_name) - - def _pack_inference_request(self, feed, fetch, is_python, log_id): - req = multi_lang_general_model_service_pb2.InferenceRequest() - req.fetch_var_names.extend(fetch) - req.is_python = is_python - req.log_id = log_id - feed_var_names = [] - for key in feed.keys(): - if '.lod' not in key: - feed_var_names.append(key) - req.feed_var_names.extend(feed_var_names) - inst = multi_lang_general_model_service_pb2.FeedInst() - for name in req.feed_var_names: - tensor = multi_lang_general_model_service_pb2.Tensor() - var = feed[name] - v_type = self.feed_types_[name] - if is_python: - data = None - if isinstance(var, list): - if v_type == 0: # int64 - data = np.array(var, dtype="int64") - elif v_type == 1: # float32 - data = np.array(var, dtype="float32") - elif v_type == 2: # int32 - data = np.array(var, dtype="int32") - else: - raise Exception("error tensor value type.") - elif isinstance(var, np.ndarray): - data = var - if v_type == 0: - if data.dtype != 'int64': - data = data.astype("int64") - elif v_type == 1: - if data.dtype != 'float32': - data = data.astype("float32") - elif v_type == 2: - if data.dtype != 'int32': - data = data.astype("int32") - else: - raise Exception("error tensor value type.") - else: - raise Exception("var must be list or ndarray.") - tensor.data = data.tobytes() - tensor.shape.extend(list(var.shape)) - if "{}.lod".format(name) in feed.keys(): - tensor.lod.extend(feed["{}.lod".format(name)]) - inst.tensor_array.append(tensor) - req.insts.append(inst) - return req - - def _unpack_inference_response(self, resp, fetch, is_python, - need_variant_tag): - if resp.err_code != 0: - return None - tag = resp.tag - multi_result_map = {} - for model_result in resp.outputs: - inst = model_result.insts[0] - result_map = {} - for i, name in enumerate(fetch): - var = inst.tensor_array[i] - v_type = self.fetch_types_[name] - if is_python: - if v_type == 0: # int64 - result_map[name] = np.frombuffer( - var.data, dtype="int64") - elif v_type == 1: # float32 - result_map[name] = np.frombuffer( - var.data, dtype="float32") - else: - raise Exception("error type.") - else: - if v_type == 0: # int64 - result_map[name] = np.array( - list(var.int64_data), dtype="int64") - elif v_type == 1: # float32 - result_map[name] = np.array( - list(var.float_data), dtype="float32") - else: - raise Exception("error type.") - result_map[name].shape = list(var.shape) - if name in self.lod_tensor_set_: - result_map["{}.lod".format(name)] = np.array(list(var.lod)) - multi_result_map[model_result.engine_name] = result_map - ret = None - if len(resp.outputs) == 1: - ret = list(multi_result_map.values())[0] - else: - ret = multi_result_map - - ret["serving_status_code"] = 0 - return ret if not need_variant_tag else [ret, tag] - - def _done_callback_func(self, fetch, is_python, need_variant_tag): - def unpack_resp(resp): - return self._unpack_inference_response(resp, fetch, is_python, - need_variant_tag) - - return unpack_resp - - def get_feed_names(self): - return self.feed_names_ - - def predict(self, - feed, - fetch, - batch=True, - need_variant_tag=False, - asyn=False, - is_python=True, - log_id=0): - if isinstance(feed, dict) is False: - raise ValueError("Type Error. grpc feed must be dict.") - if batch is False: - for key in feed: - if ".lod" not in key: - feed[key] = feed[key][np.newaxis, :] - if not asyn: - try: - self.profile_.record('py_prepro_0') - req = self._pack_inference_request( - feed, fetch, is_python=is_python, log_id=log_id) - self.profile_.record('py_prepro_1') - - self.profile_.record('py_client_infer_0') - resp = self.stub_.Inference(req, timeout=self.rpc_timeout_s_) - self.profile_.record('py_client_infer_1') - - self.profile_.record('py_postpro_0') - ret = self._unpack_inference_response( - resp, - fetch, - is_python=is_python, - need_variant_tag=need_variant_tag) - self.profile_.record('py_postpro_1') - self.profile_.print_profile() - return ret - except grpc.RpcError as e: - return {"serving_status_code": e.code()} - else: - req = self._pack_inference_request( - feed, fetch, is_python=is_python, log_id=log_id) - call_future = self.stub_.Inference.future( - req, timeout=self.rpc_timeout_s_) - return MultiLangPredictFuture( - call_future, - self._done_callback_func( - fetch, - is_python=is_python, - need_variant_tag=need_variant_tag)) - - -class MultiLangPredictFuture(object): - def __init__(self, call_future, callback_func): - self.call_future_ = call_future - self.callback_func_ = callback_func - - def result(self): - try: - resp = self.call_future_.result() - except grpc.RpcError as e: - return {"serving_status_code": e.code()} - return self.callback_func_(resp) - - def add_done_callback(self, fn): - def __fn__(call_future): - assert call_future == self.call_future_ - fn(self) - - self.call_future_.add_done_callback(__fn__) +from . import convert +from . import client +from . import version +from . import io +from . import utils +from . import metric + +from convert import * +from client import * +from version import * +from io import * +from utils import * +from metric import * + +__all__ = convert.__all__ \ + + client.__all__ \ + + version.__all__ \ + + io.__all__ \ + + utils.__all__ \ + + metric.__all__ + +__version__ = version.serving_client_version diff --git a/python/paddle_serving_client/client.py b/python/paddle_serving_client/client.py new file mode 100644 index 000000000..a00fd298d --- /dev/null +++ b/python/paddle_serving_client/client.py @@ -0,0 +1,715 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=doc-string-missing + +import paddle_serving_client +import os +from .proto import sdk_configure_pb2 as sdk +from .proto import general_model_config_pb2 as m_config +import google.protobuf.text_format +import numpy as np +import requests +import json +import base64 +import time +import sys + +import grpc +from .proto import multi_lang_general_model_service_pb2 +sys.path.append( + os.path.join(os.path.abspath(os.path.dirname(__file__)), 'proto')) +from .proto import multi_lang_general_model_service_pb2_grpc + +int64_type = 0 +float32_type = 1 +int32_type = 2 +int_type = set([int64_type, int32_type]) +float_type = set([float32_type]) + + +class _NOPProfiler(object): + def record(self, name): + pass + + def print_profile(self): + pass + + +class _TimeProfiler(object): + def __init__(self): + self.pid = os.getpid() + self.print_head = 'PROFILE\tpid:{}\t'.format(self.pid) + self.time_record = [self.print_head] + + def record(self, name): + self.time_record.append('{}:{} '.format( + name, int(round(time.time() * 1000000)))) + + def print_profile(self): + self.time_record.append('\n') + sys.stderr.write(''.join(self.time_record)) + self.time_record = [self.print_head] + + +_is_profile = int(os.environ.get('FLAGS_profile_client', 0)) +_Profiler = _TimeProfiler if _is_profile else _NOPProfiler + + +class SDKConfig(object): + def __init__(self): + self.sdk_desc = sdk.SDKConf() + self.tag_list = [] + self.cluster_list = [] + self.variant_weight_list = [] + self.rpc_timeout_ms = 20000 + self.load_balance_strategy = "la" + + def add_server_variant(self, tag, cluster, variant_weight): + self.tag_list.append(tag) + self.cluster_list.append(cluster) + self.variant_weight_list.append(variant_weight) + + def set_load_banlance_strategy(self, strategy): + self.load_balance_strategy = strategy + + def gen_desc(self, rpc_timeout_ms): + predictor_desc = sdk.Predictor() + predictor_desc.name = "general_model" + predictor_desc.service_name = \ + "baidu.paddle_serving.predictor.general_model.GeneralModelService" + predictor_desc.endpoint_router = "WeightedRandomRender" + predictor_desc.weighted_random_render_conf.variant_weight_list = "|".join( + self.variant_weight_list) + + for idx, tag in enumerate(self.tag_list): + variant_desc = sdk.VariantConf() + variant_desc.tag = tag + variant_desc.naming_conf.cluster = "list://{}".format(",".join( + self.cluster_list[idx])) + predictor_desc.variants.extend([variant_desc]) + + self.sdk_desc.predictors.extend([predictor_desc]) + self.sdk_desc.default_variant_conf.tag = "default" + self.sdk_desc.default_variant_conf.connection_conf.connect_timeout_ms = 2000 + self.sdk_desc.default_variant_conf.connection_conf.rpc_timeout_ms = rpc_timeout_ms + self.sdk_desc.default_variant_conf.connection_conf.connect_retry_count = 2 + self.sdk_desc.default_variant_conf.connection_conf.max_connection_per_host = 100 + self.sdk_desc.default_variant_conf.connection_conf.hedge_request_timeout_ms = -1 + self.sdk_desc.default_variant_conf.connection_conf.hedge_fetch_retry_count = 2 + self.sdk_desc.default_variant_conf.connection_conf.connection_type = "pooled" + + self.sdk_desc.default_variant_conf.naming_conf.cluster_filter_strategy = "Default" + self.sdk_desc.default_variant_conf.naming_conf.load_balance_strategy = "la" + + self.sdk_desc.default_variant_conf.rpc_parameter.compress_type = 0 + self.sdk_desc.default_variant_conf.rpc_parameter.package_size = 20 + self.sdk_desc.default_variant_conf.rpc_parameter.protocol = "baidu_std" + self.sdk_desc.default_variant_conf.rpc_parameter.max_channel_per_request = 3 + + return self.sdk_desc + + +class Client(object): + def __init__(self): + self.feed_names_ = [] + self.fetch_names_ = [] + self.client_handle_ = None + self.feed_shapes_ = {} + self.feed_types_ = {} + self.feed_names_to_idx_ = {} + self.pid = os.getpid() + self.predictor_sdk_ = None + self.producers = [] + self.consumer = None + self.profile_ = _Profiler() + self.all_numpy_input = True + self.has_numpy_input = False + self.rpc_timeout_ms = 20000 + from .serving_client import PredictorRes + self.predictorres_constructor = PredictorRes + + def load_client_config(self, path): + from .serving_client import PredictorClient + model_conf = m_config.GeneralModelConfig() + f = open(path, 'r') + model_conf = google.protobuf.text_format.Merge( + str(f.read()), model_conf) + + # load configuraion here + # get feed vars, fetch vars + # get feed shapes, feed types + # map feed names to index + self.client_handle_ = PredictorClient() + self.client_handle_.init(path) + if "FLAGS_max_body_size" not in os.environ: + os.environ["FLAGS_max_body_size"] = str(512 * 1024 * 1024) + read_env_flags = ["profile_client", "profile_server", "max_body_size"] + self.client_handle_.init_gflags([sys.argv[ + 0]] + ["--tryfromenv=" + ",".join(read_env_flags)]) + self.feed_names_ = [var.alias_name for var in model_conf.feed_var] + self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var] + self.feed_names_to_idx_ = {} + self.fetch_names_to_type_ = {} + self.fetch_names_to_idx_ = {} + self.lod_tensor_set = set() + self.feed_tensor_len = {} + self.key = None + + for i, var in enumerate(model_conf.feed_var): + self.feed_names_to_idx_[var.alias_name] = i + self.feed_types_[var.alias_name] = var.feed_type + self.feed_shapes_[var.alias_name] = var.shape + + if var.is_lod_tensor: + self.lod_tensor_set.add(var.alias_name) + else: + counter = 1 + for dim in self.feed_shapes_[var.alias_name]: + counter *= dim + self.feed_tensor_len[var.alias_name] = counter + for i, var in enumerate(model_conf.fetch_var): + self.fetch_names_to_idx_[var.alias_name] = i + self.fetch_names_to_type_[var.alias_name] = var.fetch_type + if var.is_lod_tensor: + self.lod_tensor_set.add(var.alias_name) + return + + def add_variant(self, tag, cluster, variant_weight): + if self.predictor_sdk_ is None: + self.predictor_sdk_ = SDKConfig() + self.predictor_sdk_.add_server_variant(tag, cluster, + str(variant_weight)) + + def set_rpc_timeout_ms(self, rpc_timeout): + if not isinstance(rpc_timeout, int): + raise ValueError("rpc_timeout must be int type.") + else: + self.rpc_timeout_ms = rpc_timeout + + def use_key(self, key_filename): + with open(key_filename, "rb") as f: + self.key = f.read() + + def get_serving_port(self, endpoints): + if self.key is not None: + req = json.dumps({"key": base64.b64encode(self.key).decode()}) + else: + req = json.dumps({}) + r = requests.post("http://" + endpoints[0], req) + result = r.json() + print(result) + if "endpoint_list" not in result: + raise ValueError("server not ready") + else: + endpoints = [ + endpoints[0].split(":")[0] + ":" + + str(result["endpoint_list"][0]) + ] + return endpoints + + def connect(self, endpoints=None, encryption=False): + # check whether current endpoint is available + # init from client config + # create predictor here + if endpoints is None: + if self.predictor_sdk_ is None: + raise ValueError( + "You must set the endpoints parameter or use add_variant function to create a variant." + ) + else: + if encryption: + endpoints = self.get_serving_port(endpoints) + if self.predictor_sdk_ is None: + self.add_variant('default_tag_{}'.format(id(self)), endpoints, + 100) + else: + print( + "parameter endpoints({}) will not take effect, because you use the add_variant function.". + format(endpoints)) + sdk_desc = self.predictor_sdk_.gen_desc(self.rpc_timeout_ms) + self.client_handle_.create_predictor_by_desc(sdk_desc.SerializeToString( + )) + + def get_feed_names(self): + return self.feed_names_ + + def get_fetch_names(self): + return self.fetch_names_ + + def shape_check(self, feed, key): + if key in self.lod_tensor_set: + return + if isinstance(feed[key], + list) and len(feed[key]) != self.feed_tensor_len[key]: + raise ValueError("The shape of feed tensor {} not match.".format( + key)) + if type(feed[key]).__module__ == np.__name__ and np.size(feed[ + key]) != self.feed_tensor_len[key]: + #raise SystemExit("The shape of feed tensor {} not match.".format( + # key)) + pass + + def predict(self, + feed=None, + fetch=None, + batch=False, + need_variant_tag=False, + log_id=0): + self.profile_.record('py_prepro_0') + + if feed is None or fetch is None: + raise ValueError("You should specify feed and fetch for prediction") + + fetch_list = [] + if isinstance(fetch, str): + fetch_list = [fetch] + elif isinstance(fetch, list): + fetch_list = fetch + else: + raise ValueError("Fetch only accepts string and list of string") + + feed_batch = [] + if isinstance(feed, dict): + feed_batch.append(feed) + elif isinstance(feed, list): + feed_batch = feed + else: + raise ValueError("Feed only accepts dict and list of dict") + + int_slot_batch = [] + float_slot_batch = [] + int_feed_names = [] + float_feed_names = [] + int_shape = [] + int_lod_slot_batch = [] + float_lod_slot_batch = [] + float_shape = [] + + fetch_names = [] + counter = 0 + batch_size = len(feed_batch) + + for key in fetch_list: + if key in self.fetch_names_: + fetch_names.append(key) + + if len(fetch_names) == 0: + raise ValueError( + "Fetch names should not be empty or out of saved fetch list.") + return {} + + for i, feed_i in enumerate(feed_batch): + int_slot = [] + float_slot = [] + int_lod_slot = [] + float_lod_slot = [] + for key in feed_i: + if ".lod" not in key and key not in self.feed_names_: + raise ValueError("Wrong feed name: {}.".format(key)) + if ".lod" in key: + continue + #if not isinstance(feed_i[key], np.ndarray): + self.shape_check(feed_i, key) + if self.feed_types_[key] in int_type: + if i == 0: + int_feed_names.append(key) + shape_lst = [] + if batch == False: + feed_i[key] = feed_i[key][np.newaxis, :] + if isinstance(feed_i[key], np.ndarray): + shape_lst.extend(list(feed_i[key].shape)) + int_shape.append(shape_lst) + else: + int_shape.append(self.feed_shapes_[key]) + if "{}.lod".format(key) in feed_i: + int_lod_slot_batch.append(feed_i["{}.lod".format( + key)]) + else: + int_lod_slot_batch.append([]) + + if isinstance(feed_i[key], np.ndarray): + int_slot.append(feed_i[key]) + self.has_numpy_input = True + else: + int_slot.append(feed_i[key]) + self.all_numpy_input = False + + elif self.feed_types_[key] in float_type: + if i == 0: + float_feed_names.append(key) + shape_lst = [] + if batch == False: + feed_i[key] = feed_i[key][np.newaxis, :] + if isinstance(feed_i[key], np.ndarray): + shape_lst.extend(list(feed_i[key].shape)) + float_shape.append(shape_lst) + else: + float_shape.append(self.feed_shapes_[key]) + if "{}.lod".format(key) in feed_i: + float_lod_slot_batch.append(feed_i["{}.lod".format( + key)]) + else: + float_lod_slot_batch.append([]) + + if isinstance(feed_i[key], np.ndarray): + float_slot.append(feed_i[key]) + self.has_numpy_input = True + else: + float_slot.append(feed_i[key]) + self.all_numpy_input = False + int_slot_batch.append(int_slot) + float_slot_batch.append(float_slot) + int_lod_slot_batch.append(int_lod_slot) + float_lod_slot_batch.append(float_lod_slot) + + self.profile_.record('py_prepro_1') + self.profile_.record('py_client_infer_0') + + result_batch_handle = self.predictorres_constructor() + if self.all_numpy_input: + res = self.client_handle_.numpy_predict( + float_slot_batch, float_feed_names, float_shape, + float_lod_slot_batch, int_slot_batch, int_feed_names, int_shape, + int_lod_slot_batch, fetch_names, result_batch_handle, self.pid, + log_id) + elif self.has_numpy_input == False: + raise ValueError( + "Please make sure all of your inputs are numpy array") + else: + raise ValueError( + "Please make sure the inputs are all in list type or all in numpy.array type" + ) + + self.profile_.record('py_client_infer_1') + self.profile_.record('py_postpro_0') + + if res == -1: + return None + + multi_result_map = [] + model_engine_names = result_batch_handle.get_engine_names() + for mi, engine_name in enumerate(model_engine_names): + result_map = {} + # result map needs to be a numpy array + for i, name in enumerate(fetch_names): + if self.fetch_names_to_type_[name] == int64_type: + # result_map[name] will be py::array(numpy array) + result_map[name] = result_batch_handle.get_int64_by_name( + mi, name) + shape = result_batch_handle.get_shape(mi, name) + if result_map[name].size == 0: + raise ValueError( + "Failed to fetch, maybe the type of [{}]" + " is wrong, please check the model file".format( + name)) + result_map[name].shape = shape + if name in self.lod_tensor_set: + tmp_lod = result_batch_handle.get_lod(mi, name) + if np.size(tmp_lod) > 0: + result_map["{}.lod".format(name)] = tmp_lod + elif self.fetch_names_to_type_[name] == float32_type: + result_map[name] = result_batch_handle.get_float_by_name( + mi, name) + if result_map[name].size == 0: + raise ValueError( + "Failed to fetch, maybe the type of [{}]" + " is wrong, please check the model file".format( + name)) + shape = result_batch_handle.get_shape(mi, name) + result_map[name].shape = shape + if name in self.lod_tensor_set: + tmp_lod = result_batch_handle.get_lod(mi, name) + if np.size(tmp_lod) > 0: + result_map["{}.lod".format(name)] = tmp_lod + elif self.fetch_names_to_type_[name] == int32_type: + # result_map[name] will be py::array(numpy array) + result_map[name] = result_batch_handle.get_int32_by_name( + mi, name) + if result_map[name].size == 0: + raise ValueError( + "Failed to fetch, maybe the type of [{}]" + " is wrong, please check the model file".format( + name)) + shape = result_batch_handle.get_shape(mi, name) + result_map[name].shape = shape + if name in self.lod_tensor_set: + tmp_lod = result_batch_handle.get_lod(mi, name) + if np.size(tmp_lod) > 0: + result_map["{}.lod".format(name)] = tmp_lod + multi_result_map.append(result_map) + ret = None + if len(model_engine_names) == 1: + # If only one model result is returned, the format of ret is result_map + ret = multi_result_map[0] + else: + # If multiple model results are returned, the format of ret is {name: result_map} + ret = { + engine_name: multi_result_map[mi] + for mi, engine_name in enumerate(model_engine_names) + } + + self.profile_.record('py_postpro_1') + self.profile_.print_profile() + + # When using the A/B test, the tag of variant needs to be returned + return ret if not need_variant_tag else [ + ret, result_batch_handle.variant_tag() + ] + + def release(self): + self.client_handle_.destroy_predictor() + self.client_handle_ = None + + +class MultiLangClient(object): + def __init__(self): + self.channel_ = None + self.stub_ = None + self.rpc_timeout_s_ = 2 + self.profile_ = _Profiler() + + def add_variant(self, tag, cluster, variant_weight): + # TODO + raise Exception("cannot support ABtest yet") + + def set_rpc_timeout_ms(self, rpc_timeout): + if self.stub_ is None: + raise Exception("set timeout must be set after connect.") + if not isinstance(rpc_timeout, int): + # for bclient + raise ValueError("rpc_timeout must be int type.") + self.rpc_timeout_s_ = rpc_timeout / 1000.0 + timeout_req = multi_lang_general_model_service_pb2.SetTimeoutRequest() + timeout_req.timeout_ms = rpc_timeout + resp = self.stub_.SetTimeout(timeout_req) + return resp.err_code == 0 + + def connect(self, endpoints): + # https://github.com/tensorflow/serving/issues/1382 + options = [('grpc.max_receive_message_length', 512 * 1024 * 1024), + ('grpc.max_send_message_length', 512 * 1024 * 1024), + ('grpc.lb_policy_name', 'round_robin')] + # TODO: weight round robin + g_endpoint = 'ipv4:{}'.format(','.join(endpoints)) + self.channel_ = grpc.insecure_channel(g_endpoint, options=options) + self.stub_ = multi_lang_general_model_service_pb2_grpc.MultiLangGeneralModelServiceStub( + self.channel_) + # get client model config + get_client_config_req = multi_lang_general_model_service_pb2.GetClientConfigRequest( + ) + resp = self.stub_.GetClientConfig(get_client_config_req) + model_config_str = resp.client_config_str + self._parse_model_config(model_config_str) + + def _flatten_list(self, nested_list): + for item in nested_list: + if isinstance(item, (list, tuple)): + for sub_item in self._flatten_list(item): + yield sub_item + else: + yield item + + def _parse_model_config(self, model_config_str): + model_conf = m_config.GeneralModelConfig() + model_conf = google.protobuf.text_format.Merge(model_config_str, + model_conf) + self.feed_names_ = [var.alias_name for var in model_conf.feed_var] + self.feed_types_ = {} + self.feed_shapes_ = {} + self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var] + self.fetch_types_ = {} + self.lod_tensor_set_ = set() + for i, var in enumerate(model_conf.feed_var): + self.feed_types_[var.alias_name] = var.feed_type + self.feed_shapes_[var.alias_name] = var.shape + if var.is_lod_tensor: + self.lod_tensor_set_.add(var.alias_name) + else: + counter = 1 + for dim in self.feed_shapes_[var.alias_name]: + counter *= dim + for i, var in enumerate(model_conf.fetch_var): + self.fetch_types_[var.alias_name] = var.fetch_type + if var.is_lod_tensor: + self.lod_tensor_set_.add(var.alias_name) + + def _pack_inference_request(self, feed, fetch, is_python, log_id): + req = multi_lang_general_model_service_pb2.InferenceRequest() + req.fetch_var_names.extend(fetch) + req.is_python = is_python + req.log_id = log_id + feed_var_names = [] + for key in feed.keys(): + if '.lod' not in key: + feed_var_names.append(key) + req.feed_var_names.extend(feed_var_names) + inst = multi_lang_general_model_service_pb2.FeedInst() + for name in req.feed_var_names: + tensor = multi_lang_general_model_service_pb2.Tensor() + var = feed[name] + v_type = self.feed_types_[name] + if is_python: + data = None + if isinstance(var, list): + if v_type == 0: # int64 + data = np.array(var, dtype="int64") + elif v_type == 1: # float32 + data = np.array(var, dtype="float32") + elif v_type == 2: # int32 + data = np.array(var, dtype="int32") + else: + raise Exception("error tensor value type.") + elif isinstance(var, np.ndarray): + data = var + if v_type == 0: + if data.dtype != 'int64': + data = data.astype("int64") + elif v_type == 1: + if data.dtype != 'float32': + data = data.astype("float32") + elif v_type == 2: + if data.dtype != 'int32': + data = data.astype("int32") + else: + raise Exception("error tensor value type.") + else: + raise Exception("var must be list or ndarray.") + tensor.data = data.tobytes() + tensor.shape.extend(list(var.shape)) + if "{}.lod".format(name) in feed.keys(): + tensor.lod.extend(feed["{}.lod".format(name)]) + inst.tensor_array.append(tensor) + req.insts.append(inst) + return req + + def _unpack_inference_response(self, resp, fetch, is_python, + need_variant_tag): + if resp.err_code != 0: + return None + tag = resp.tag + multi_result_map = {} + for model_result in resp.outputs: + inst = model_result.insts[0] + result_map = {} + for i, name in enumerate(fetch): + var = inst.tensor_array[i] + v_type = self.fetch_types_[name] + if is_python: + if v_type == 0: # int64 + result_map[name] = np.frombuffer( + var.data, dtype="int64") + elif v_type == 1: # float32 + result_map[name] = np.frombuffer( + var.data, dtype="float32") + else: + raise Exception("error type.") + else: + if v_type == 0: # int64 + result_map[name] = np.array( + list(var.int64_data), dtype="int64") + elif v_type == 1: # float32 + result_map[name] = np.array( + list(var.float_data), dtype="float32") + else: + raise Exception("error type.") + result_map[name].shape = list(var.shape) + if name in self.lod_tensor_set_: + result_map["{}.lod".format(name)] = np.array(list(var.lod)) + multi_result_map[model_result.engine_name] = result_map + ret = None + if len(resp.outputs) == 1: + ret = list(multi_result_map.values())[0] + else: + ret = multi_result_map + + ret["serving_status_code"] = 0 + return ret if not need_variant_tag else [ret, tag] + + def _done_callback_func(self, fetch, is_python, need_variant_tag): + def unpack_resp(resp): + return self._unpack_inference_response(resp, fetch, is_python, + need_variant_tag) + + return unpack_resp + + def get_feed_names(self): + return self.feed_names_ + + def predict(self, + feed, + fetch, + batch=True, + need_variant_tag=False, + asyn=False, + is_python=True, + log_id=0): + if isinstance(feed, dict) is False: + raise ValueError("Type Error. grpc feed must be dict.") + if batch is False: + for key in feed: + if ".lod" not in key: + feed[key] = feed[key][np.newaxis, :] + if not asyn: + try: + self.profile_.record('py_prepro_0') + req = self._pack_inference_request( + feed, fetch, is_python=is_python, log_id=log_id) + self.profile_.record('py_prepro_1') + + self.profile_.record('py_client_infer_0') + resp = self.stub_.Inference(req, timeout=self.rpc_timeout_s_) + self.profile_.record('py_client_infer_1') + + self.profile_.record('py_postpro_0') + ret = self._unpack_inference_response( + resp, + fetch, + is_python=is_python, + need_variant_tag=need_variant_tag) + self.profile_.record('py_postpro_1') + self.profile_.print_profile() + return ret + except grpc.RpcError as e: + return {"serving_status_code": e.code()} + else: + req = self._pack_inference_request( + feed, fetch, is_python=is_python, log_id=log_id) + call_future = self.stub_.Inference.future( + req, timeout=self.rpc_timeout_s_) + return MultiLangPredictFuture( + call_future, + self._done_callback_func( + fetch, + is_python=is_python, + need_variant_tag=need_variant_tag)) + + +class MultiLangPredictFuture(object): + def __init__(self, call_future, callback_func): + self.call_future_ = call_future + self.callback_func_ = callback_func + + def result(self): + try: + resp = self.call_future_.result() + except grpc.RpcError as e: + return {"serving_status_code": e.code()} + return self.callback_func_(resp) + + def add_done_callback(self, fn): + def __fn__(call_future): + assert call_future == self.call_future_ + fn(self) + + self.call_future_.add_done_callback(__fn__) diff --git a/python/paddle_serving_server/serve.py b/python/paddle_serving_server/serve.py index 8e6e8a093..257f19ddc 100644 --- a/python/paddle_serving_server/serve.py +++ b/python/paddle_serving_server/serve.py @@ -23,7 +23,6 @@ import base64 import time from multiprocessing import Pool, Process -from paddle_serving_server import serve_args from flask import Flask, request import sys if sys.version_info.major == 2: @@ -91,7 +90,58 @@ def serve_args(): help="container_id for authentication") return parser.parse_args() -def start_gpu_card_model(port, args, index = 0, gpuid): # pylint: disable=doc-string-missing +def start_standard_model(serving_port): # pylint: disable=doc-string-missing + args = parse_args() + thread_num = args.thread + model = args.model + port = serving_port + workdir = args.workdir + device = args.device + mem_optim = args.mem_optim_off is False + ir_optim = args.ir_optim + max_body_size = args.max_body_size + use_mkl = args.use_mkl + use_encryption_model = args.use_encryption_model + use_multilang = args.use_multilang + + if model == "": + print("You must specify your serving model") + exit(-1) + + import paddle_serving_server as serving + op_maker = serving.OpMaker() + read_op = op_maker.create('general_reader') + general_infer_op = op_maker.create('general_infer') + general_response_op = op_maker.create('general_response') + + op_seq_maker = serving.OpSeqMaker() + op_seq_maker.add_op(read_op) + op_seq_maker.add_op(general_infer_op) + op_seq_maker.add_op(general_response_op) + + server = None + if use_multilang: + server = serving.MultiLangServer() + else: + server = serving.Server() + server.set_op_sequence(op_seq_maker.get_op_sequence()) + server.set_num_threads(thread_num) + server.set_memory_optimize(mem_optim) + server.set_ir_optimize(ir_optim) + server.use_mkl(use_mkl) + server.set_max_body_size(max_body_size) + server.set_port(port) + server.use_encryption_model(use_encryption_model) + if args.product_name != None: + server.set_product_name(args.product_name) + if args.container_id != None: + server.set_container_id(args.container_id) + + server.load_model_config(model) + server.prepare_server(workdir=workdir, port=port, device=device) + server.run_server() + +def start_gpu_card_model(index, gpuid, port, args): # pylint: disable=doc-string-missing workdir = args.workdir gpuid = int(gpuid) device = "gpu" @@ -113,7 +163,7 @@ def start_gpu_card_model(port, args, index = 0, gpuid): # pylint: disable=doc-s print("You must specify your serving model") exit(-1) - import paddle_serving_server_gpu as serving + import paddle_serving_server as serving op_maker = serving.OpMaker() read_op = op_maker.create('general_reader') general_infer_op = op_maker.create('general_infer') diff --git a/python/pipeline/local_service_handler.py b/python/pipeline/local_service_handler.py index 65261dfa3..7f61b7392 100644 --- a/python/pipeline/local_service_handler.py +++ b/python/pipeline/local_service_handler.py @@ -15,8 +15,8 @@ import os import logging import multiprocessing -#from paddle_serving_server_gpu import OpMaker, OpSeqMaker -#from paddle_serving_server_gpu import Server as GpuServer +#from paddle_serving_server import OpMaker, OpSeqMaker +#from paddle_serving_server import Server as GpuServer #from paddle_serving_server import Server as CpuServer from . import util #from paddle_serving_app.local_predict import LocalPredictor @@ -235,7 +235,7 @@ def _prepare_one_server(self, workdir, port, gpuid, thread_num, mem_optim, server = Server() else: #gpu or arm - from paddle_serving_server_gpu import OpMaker, OpSeqMaker, Server + from paddle_serving_server import OpMaker, OpSeqMaker, Server op_maker = OpMaker() read_op = op_maker.create('general_reader') general_infer_op = op_maker.create('general_infer') diff --git a/python/setup.py.server_gpu.in b/python/setup.py.server_gpu.in index 468d5401f..7e10da1df 100644 --- a/python/setup.py.server_gpu.in +++ b/python/setup.py.server_gpu.in @@ -19,7 +19,7 @@ from __future__ import print_function from setuptools import setup, Distribution, Extension from setuptools import find_packages from setuptools import setup -from paddle_serving_server_gpu.version import serving_server_version, cuda_version +from paddle_serving_server.version import serving_server_version, cuda_version import util if cuda_version != "trt": @@ -27,34 +27,34 @@ if cuda_version != "trt": max_version, mid_version, min_version = util.python_version() # gen pipeline proto code -util.gen_pipeline_code("paddle_serving_server_gpu") +util.gen_pipeline_code("paddle_serving_server") REQUIRED_PACKAGES = [ 'six >= 1.10.0', 'protobuf >= 3.11.0', 'grpcio <= 1.33.2', 'grpcio-tools <= 1.33.2', 'flask >= 1.1.1', 'func_timeout', 'pyyaml' ] -packages=['paddle_serving_server_gpu', - 'paddle_serving_server_gpu.proto', - 'paddle_serving_server_gpu.pipeline', - 'paddle_serving_server_gpu.pipeline.proto', - 'paddle_serving_server_gpu.pipeline.gateway', - 'paddle_serving_server_gpu.pipeline.gateway.proto'] +packages=['paddle_serving_server', + 'paddle_serving_server.proto', + 'paddle_serving_server.pipeline', + 'paddle_serving_server.pipeline.proto', + 'paddle_serving_server.pipeline.gateway', + 'paddle_serving_server.pipeline.gateway.proto'] -package_dir={'paddle_serving_server_gpu': - '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu', - 'paddle_serving_server_gpu.proto': - '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/proto', - 'paddle_serving_server_gpu.pipeline': - '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/pipeline', - 'paddle_serving_server_gpu.pipeline.proto': - '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/pipeline/proto', - 'paddle_serving_server_gpu.pipeline.gateway': - '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/pipeline/gateway', - 'paddle_serving_server_gpu.pipeline.gateway.proto': - '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/pipeline/gateway/proto'} +package_dir={'paddle_serving_server': + '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server', + 'paddle_serving_server.proto': + '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto', + 'paddle_serving_server.pipeline': + '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/pipeline', + 'paddle_serving_server.pipeline.proto': + '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/pipeline/proto', + 'paddle_serving_server.pipeline.gateway': + '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/pipeline/gateway', + 'paddle_serving_server.pipeline.gateway.proto': + '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/pipeline/gateway/proto'} -package_data={'paddle_serving_server_gpu': ['pipeline/gateway/libproxy_server.so'],} +package_data={'paddle_serving_server': ['pipeline/gateway/libproxy_server.so'],} setup( name='paddle-serving-server-gpu', From ab371377178bea283b1247b7eec70cd8496aeb0f Mon Sep 17 00:00:00 2001 From: zhangjun Date: Thu, 11 Mar 2021 11:39:02 +0000 Subject: [PATCH 06/12] update --- python/paddle_serving_server/__init__.py | 21 +++++++++++---------- python/paddle_serving_server/server.py | 2 +- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/python/paddle_serving_server/__init__.py b/python/paddle_serving_server/__init__.py index e7633b23b..6a900ebec 100644 --- a/python/paddle_serving_server/__init__.py +++ b/python/paddle_serving_server/__init__.py @@ -13,19 +13,20 @@ # limitations under the License. # pylint: disable=doc-string-missing -from . import dag from . import monitor from . import rpc_service from . import serve -from . import web_service from . import version -from dag import * -from monitor import * -from rpc_service import * -from serve import * -from web_service import * -from version import * +__all__ = ["version", "server", "serve", "monitor", "rpc_service", "dag"] -SERVER_VERSION = "0.0.0" -__version__ = SERVER_VERSION \ No newline at end of file +from paddle_serving_server import ( + version, + server, + serve, + monitor, + rpc_service, + dag, +) + +__version__ = version.serving_server_version diff --git a/python/paddle_serving_server/server.py b/python/paddle_serving_server/server.py index ec0c545b5..e0686662f 100644 --- a/python/paddle_serving_server/server.py +++ b/python/paddle_serving_server/server.py @@ -582,4 +582,4 @@ def run_server(self): server.add_insecure_port('[::]:{}'.format(self.gport_)) server.start() p_bserver.join() - server.wait_for_termination() \ No newline at end of file + server.wait_for_termination() From b5ad75097dae7064dfbe567f7b387533710e2532 Mon Sep 17 00:00:00 2001 From: zhangjun Date: Thu, 11 Mar 2021 12:38:22 +0000 Subject: [PATCH 07/12] update __init__.py for client --- python/paddle_serving_client/__init__.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/python/paddle_serving_client/__init__.py b/python/paddle_serving_client/__init__.py index f0a7c03ac..f46f435ba 100644 --- a/python/paddle_serving_client/__init__.py +++ b/python/paddle_serving_client/__init__.py @@ -13,25 +13,6 @@ # limitations under the License. # pylint: disable=doc-string-missing -from . import convert -from . import client from . import version -from . import io -from . import utils -from . import metric - -from convert import * -from client import * -from version import * -from io import * -from utils import * -from metric import * - -__all__ = convert.__all__ \ - + client.__all__ \ - + version.__all__ \ - + io.__all__ \ - + utils.__all__ \ - + metric.__all__ __version__ = version.serving_client_version From 9be48755b6242844b1c07066b1006993b80c5599 Mon Sep 17 00:00:00 2001 From: zhangjun Date: Mon, 15 Mar 2021 10:53:39 +0000 Subject: [PATCH 08/12] update --- python/CMakeLists.txt | 8 +- python/gen_version.py | 8 +- python/paddle_serving_client/__init__.py | 3 + python/paddle_serving_server/__init__.py | 6 +- python/paddle_serving_server/serve.py | 6 +- python/paddle_serving_server/server.py | 99 +++++++++++++----------- python/paddle_serving_server/version.py | 4 +- python/requirements.txt | 7 +- python/requirements_mac.txt | 7 +- python/setup.py.server.in | 12 ++- python/setup.py.server_gpu.in | 91 ---------------------- 11 files changed, 95 insertions(+), 156 deletions(-) delete mode 100644 python/setup.py.server_gpu.in diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index d12af3ad0..16bdbbbe8 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -7,6 +7,12 @@ if (CLIENT) endif() if (SERVER) + if (WITH_GPU) + set(SERVER_PACKAGE_NAME "paddle-serving-server-gpu") + elseif(WITH_XPU) + set(SERVER_PACKAGE_NAME "paddle-serving-server-xpu") + endif() + elseif(WITH_XPU) file(INSTALL pipeline DESTINATION paddle_serving_server) file(GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server/*.py) set(PY_FILES ${SERVING_SERVER_PY_FILES}) @@ -74,7 +80,7 @@ if (SERVER) endif() if(WITH_LITE) - set(VERSION_SUFFIX xpu) + set(VERSION_SUFFIX 2) endif() add_custom_command( diff --git a/python/gen_version.py b/python/gen_version.py index ed812a924..6a266dd20 100644 --- a/python/gen_version.py +++ b/python/gen_version.py @@ -34,10 +34,16 @@ def update_info(file_name, feature, info): f.write(new_str) -if len(sys.argv) > 2: +if len(sys.argv) > 2 and len(sys.argv[2]) > 0: update_info("paddle_serving_server/version.py", "version_suffix", sys.argv[2]) +package_name = '${SERVER_PACKAGE_NAME}' +if package_name.endswith('gpu'): + update_info("paddle_serving_server/version.py", "device_type", "1") +elif package_name.endswith('xpu'): + update_info("paddle_serving_server/version.py", "device_type", "2") + path = "paddle_serving_" + sys.argv[1] commit_id = subprocess.check_output(['git', 'rev-parse', 'HEAD']) update_info(path + "/version.py", "commit_id", commit_id) diff --git a/python/paddle_serving_client/__init__.py b/python/paddle_serving_client/__init__.py index f46f435ba..23745bce5 100644 --- a/python/paddle_serving_client/__init__.py +++ b/python/paddle_serving_client/__init__.py @@ -15,4 +15,7 @@ from . import version +from . import client +from .client import * + __version__ = version.serving_client_version diff --git a/python/paddle_serving_server/__init__.py b/python/paddle_serving_server/__init__.py index 6a900ebec..b74caf7d6 100644 --- a/python/paddle_serving_server/__init__.py +++ b/python/paddle_serving_server/__init__.py @@ -26,7 +26,9 @@ serve, monitor, rpc_service, - dag, -) + dag, ) + +from .dag import * +from .server import * __version__ = version.serving_server_version diff --git a/python/paddle_serving_server/serve.py b/python/paddle_serving_server/serve.py index 257f19ddc..3f7ea5c75 100644 --- a/python/paddle_serving_server/serve.py +++ b/python/paddle_serving_server/serve.py @@ -30,6 +30,7 @@ elif sys.version_info.major == 3: from http.server import BaseHTTPRequestHandler, HTTPServer + def serve_args(): parser = argparse.ArgumentParser("serve") parser.add_argument( @@ -90,6 +91,7 @@ def serve_args(): help="container_id for authentication") return parser.parse_args() + def start_standard_model(serving_port): # pylint: disable=doc-string-missing args = parse_args() thread_num = args.thread @@ -141,6 +143,7 @@ def start_standard_model(serving_port): # pylint: disable=doc-string-missing server.prepare_server(workdir=workdir, port=port, device=device) server.run_server() + def start_gpu_card_model(index, gpuid, port, args): # pylint: disable=doc-string-missing workdir = args.workdir gpuid = int(gpuid) @@ -189,7 +192,6 @@ def start_gpu_card_model(index, gpuid, port, args): # pylint: disable=doc-strin if args.use_lite: server.set_lite() - device = "arm" server.set_device(device) if args.use_xpu: @@ -230,7 +232,7 @@ def start_multi_card(args, serving_port=None): # pylint: disable=doc-string-mis else: env_gpus = [] if args.use_lite: - print("run arm server.") + print("run using paddle-lite.") start_gpu_card_model(-1, -1, args) elif len(gpus) <= 0: print("gpu_ids not set, going to run cpu service.") diff --git a/python/paddle_serving_server/server.py b/python/paddle_serving_server/server.py index e0686662f..bd33bfdcc 100644 --- a/python/paddle_serving_server/server.py +++ b/python/paddle_serving_server/server.py @@ -1,10 +1,26 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os import tarfile import socket import paddle_serving_server as paddle_serving_server +from .proto import server_configure_pb2 as server_sdk +from .proto import general_model_config_pb2 as m_config +import google.protobuf.text_format import time -from .version import serving_server_version +from .version import serving_server_version, version_suffix, device_type from contextlib import closing import argparse @@ -12,6 +28,7 @@ if sys.platform.startswith('win') is False: import fcntl import shutil +import platform import numpy as np import grpc import sys @@ -19,6 +36,7 @@ from multiprocessing import Pool, Process from concurrent import futures + class Server(object): def __init__(self): self.server_handle_ = None @@ -144,32 +162,18 @@ def _prepare_engine(self, model_config_paths, device, use_encryption_model): engine.runtime_thread_num = 0 engine.batch_infer_size = 0 engine.enable_batch_align = 0 - engine.model_data_path = model_config_path + engine.model_dir = model_config_path engine.enable_memory_optimization = self.memory_optimization engine.enable_ir_optimization = self.ir_optimization - engine.static_optimization = False - engine.force_update_static_cache = False engine.use_trt = self.use_trt - if os.path.exists('{}/__params__'.format(model_config_path)): - suffix = "" - else: - suffix = "_DIR" - if device == "arm": - engine.use_lite = self.use_lite - engine.use_xpu = self.use_xpu - engine.type = "PaddleInferenceEngine" - # if device == "cpu": - # if use_encryption_model: - # engine.type = "FLUID_CPU_ANALYSIS_ENCRPT" - # else: - # engine.type = "FLUID_CPU_ANALYSIS" + suffix - # elif device == "gpu": - # if use_encryption_model: - # engine.type = "FLUID_GPU_ANALYSIS_ENCRPT" - # else: - # engine.type = "FLUID_GPU_ANALYSIS" + suffix - # elif device == "arm": - # engine.type = "FLUID_ARM_ANALYSIS" + suffix + engine.use_lite = self.use_lite + engine.use_xpu = self.use_xpu + if not os.path.exists('{}/__params__'.format(model_config_path)): + engine.combined_model = True + if use_encryption_model: + engine.encrypted_model = True + engine.type = "PaddleInfer" + self.model_toolkit_conf.engines.extend([engine]) def _prepare_infer_service(self, port): @@ -259,7 +263,7 @@ def load_model_config(self, model_config_paths): str(f.read()), self.model_conf) # check config here # print config here - + def use_mkl(self, flag): self.mkl_flag = flag @@ -272,15 +276,27 @@ def get_device_version(self): avx_flag = True if avx_flag: if mkl_flag: - device_version = "serving-cpu-avx-mkl-" + device_version = "cpu-avx-mkl" else: - device_version = "serving-cpu-avx-openblas-" + device_version = "cpu-avx-openblas" else: if mkl_flag: print( "Your CPU does not support AVX, server will running with noavx-openblas mode." ) - device_version = "serving-cpu-noavx-openblas-" + device_version = "cpu-noavx-openblas" + return device_version + + def get_serving_bin_name(self): + if device_type == "0": + device_version = self.get_device_version() + elif device_type == "1": + if version_suffix == "101" or version_suffix == "102": + device_version = "gpu-" + version_suffix + else: + device_version = "gpu-cuda" + version_suffix + elif device_type == "2": + device_version = "xpu-" + platform.machine() return device_version def download_bin(self): @@ -289,21 +305,10 @@ def download_bin(self): #acquire lock version_file = open("{}/version.py".format(self.module_path), "r") - import re - for line in version_file.readlines(): - # to add, version_suffix - if re.match("cuda_version", line): - cuda_version = line.split("\"")[1] - if cuda_version == "101" or cuda_version == "102": - device_version = "serving-gpu-" + cuda_version + "-" - elif cuda_version == "arm" or cuda_version == "arm-xpu": - device_version = "serving-" + cuda_version + "-" - else: - device_version = "serving-gpu-cuda" + cuda_version + "-" - - folder_name = device_version + serving_server_version - tar_name = folder_name + ".tar.gz" - bin_url = "https://paddle-serving.bj.bcebos.com/bin/" + tar_name + + tar_name = self.get_serving_bin_name() + ".tar.gz" + bin_url = "https://paddle-serving.bj.bcebos.com/bin/serving-%s-%s.tar.gz" % ( + self.get_serving_bin_name(), serving_server_version) self.server_path = os.path.join(self.module_path, folder_name) download_flag = "{}/{}.is_download".format(self.module_path, @@ -346,8 +351,7 @@ def download_bin(self): version_file.close() os.chdir(self.cur_path) self.bin_path = self.server_path + "/serving" - - + def prepare_server(self, workdir=None, port=9292, @@ -466,6 +470,7 @@ def run_server(self): os.system(command) + class MultiLangServer(object): def __init__(self): self.bserver_ = Server() @@ -508,10 +513,10 @@ def set_op_sequence(self, op_seq): def set_op_graph(self, op_graph): self.bserver_.set_op_graph(op_graph) - + def use_mkl(self, flag): self.bserver_.use_mkl(flag) - + def set_memory_optimize(self, flag=False): self.bserver_.set_memory_optimize(flag) diff --git a/python/paddle_serving_server/version.py b/python/paddle_serving_server/version.py index b774c2237..ea643ee24 100644 --- a/python/paddle_serving_server/version.py +++ b/python/paddle_serving_server/version.py @@ -11,9 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Paddle Serving Client version string """ +""" Paddle Serving Server version string """ serving_client_version = "0.0.0" serving_server_version = "0.0.0" module_proto_version = "0.0.0" +version_suffix = "" +device_type = "0" cuda_version = "9" commit_id = "" diff --git a/python/requirements.txt b/python/requirements.txt index 6771d1ade..bfe418f5d 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -2,14 +2,15 @@ numpy>=1.12, <=1.16.4 ; python_version<"3.5" shapely==1.7.0 wheel>=0.34.0, <0.35.0 setuptools>=44.1.0 -opencv-python==4.2.0.32 google>=2.0.3 -opencv-python==4.2.0.32 protobuf>=3.12.2 grpcio-tools>=1.28.1 grpcio>=1.28.1 func-timeout>=4.3.5 pyyaml>=1.3.0 -sentencepiece==0.1.92 flask>=1.1.2 ujson>=2.0.3 +sentencepiece==0.1.92; platform_machine != "aarch64" +sentencepiece; platform_machine == "aarch64" +opencv-python==4.2.0.32; platform_machine != "aarch64" +opencv-python; platform_machine == "aarch64" diff --git a/python/requirements_mac.txt b/python/requirements_mac.txt index df05a2908..09a32ed0f 100644 --- a/python/requirements_mac.txt +++ b/python/requirements_mac.txt @@ -2,14 +2,13 @@ numpy>=1.12, <=1.16.4 ; python_version<"3.5" shapely==1.7.0 wheel>=0.34.0, <0.35.0 setuptools>=44.1.0 -opencv-python==4.2.0.32 google>=2.0.3 opencv-python==4.2.0.32 protobuf>=3.12.2 -grpcio-tools>=1.33.2 -grpcio>=1.33.2 func-timeout>=4.3.5 pyyaml>=1.3.0 -sentencepiece==0.1.83 flask>=1.1.2 ujson>=2.0.3 +grpcio-tools>=1.33.2 +grpcio>=1.33.2 +sentencepiece==0.1.83 diff --git a/python/setup.py.server.in b/python/setup.py.server.in index 07e8dc380..b9bc1c1e7 100644 --- a/python/setup.py.server.in +++ b/python/setup.py.server.in @@ -19,11 +19,15 @@ from __future__ import print_function from setuptools import setup, Distribution, Extension from setuptools import find_packages from setuptools import setup -from paddle_serving_server.version import serving_server_version +from paddle_serving_server.version import serving_server_version, version_suffix import util -max_version, mid_version, min_version = util.python_version() +package_version = serving_server_version.replace('-', '') +if version_suffix != "": + version_suffix = "post" + version_suffix + package_version = package_version + "." + version_suffix +max_version, mid_version, min_version = util.python_version() # gen pipeline proto code util.gen_pipeline_code("paddle_serving_server") @@ -55,8 +59,8 @@ package_dir={'paddle_serving_server': package_data={'paddle_serving_server': ['pipeline/gateway/libproxy_server.so'],} setup( - name='paddle-serving-server', - version=serving_server_version.replace('-', ''), + name='${SERVER_PACKAGE_NAME}', + version= package_version, description= ('Paddle Serving Package for saved model with PaddlePaddle'), url='https://github.com/PaddlePaddle/Serving', diff --git a/python/setup.py.server_gpu.in b/python/setup.py.server_gpu.in deleted file mode 100644 index 7e10da1df..000000000 --- a/python/setup.py.server_gpu.in +++ /dev/null @@ -1,91 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Setup for pip package.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from setuptools import setup, Distribution, Extension -from setuptools import find_packages -from setuptools import setup -from paddle_serving_server.version import serving_server_version, cuda_version -import util - -if cuda_version != "trt": - cuda_version = "post" + cuda_version - -max_version, mid_version, min_version = util.python_version() -# gen pipeline proto code -util.gen_pipeline_code("paddle_serving_server") - -REQUIRED_PACKAGES = [ - 'six >= 1.10.0', 'protobuf >= 3.11.0', 'grpcio <= 1.33.2', 'grpcio-tools <= 1.33.2', - 'flask >= 1.1.1', 'func_timeout', 'pyyaml' -] - -packages=['paddle_serving_server', - 'paddle_serving_server.proto', - 'paddle_serving_server.pipeline', - 'paddle_serving_server.pipeline.proto', - 'paddle_serving_server.pipeline.gateway', - 'paddle_serving_server.pipeline.gateway.proto'] - -package_dir={'paddle_serving_server': - '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server', - 'paddle_serving_server.proto': - '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto', - 'paddle_serving_server.pipeline': - '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/pipeline', - 'paddle_serving_server.pipeline.proto': - '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/pipeline/proto', - 'paddle_serving_server.pipeline.gateway': - '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/pipeline/gateway', - 'paddle_serving_server.pipeline.gateway.proto': - '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/pipeline/gateway/proto'} - -package_data={'paddle_serving_server': ['pipeline/gateway/libproxy_server.so'],} - -setup( - name='paddle-serving-server-gpu', - version=serving_server_version.replace('-', '') + "." + cuda_version, - description= - ('Paddle Serving Package for saved model with PaddlePaddle'), - url='https://github.com/PaddlePaddle/Serving', - author='PaddlePaddle Author', - author_email='guru4elephant@gmail.com', - install_requires=REQUIRED_PACKAGES, - packages=packages, - package_data=package_data, - package_dir=package_dir, - # PyPI package information. - classifiers=[ - 'Development Status :: 4 - Beta', - 'Intended Audience :: Developers', - 'Intended Audience :: Education', - 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: Apache Software License', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Topic :: Scientific/Engineering', - 'Topic :: Scientific/Engineering :: Mathematics', - 'Topic :: Scientific/Engineering :: Artificial Intelligence', - 'Topic :: Software Development', - 'Topic :: Software Development :: Libraries', - 'Topic :: Software Development :: Libraries :: Python Modules', - ], - license='Apache 2.0', - keywords=('paddle-serving serving-server deployment industrial easy-to-use')) From 46aa1fad3341e03810a2b90bda7a5657e7b0ef44 Mon Sep 17 00:00:00 2001 From: zhangjun Date: Mon, 15 Mar 2021 11:37:57 +0000 Subject: [PATCH 09/12] fix --- python/paddle_serving_server/server.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/python/paddle_serving_server/server.py b/python/paddle_serving_server/server.py index bd33bfdcc..daa315854 100644 --- a/python/paddle_serving_server/server.py +++ b/python/paddle_serving_server/server.py @@ -168,11 +168,13 @@ def _prepare_engine(self, model_config_paths, device, use_encryption_model): engine.use_trt = self.use_trt engine.use_lite = self.use_lite engine.use_xpu = self.use_xpu - if not os.path.exists('{}/__params__'.format(model_config_path)): + if os.path.exists('{}/__params__'.format(model_config_path)): engine.combined_model = True + else: + engine.combined_model = False if use_encryption_model: engine.encrypted_model = True - engine.type = "PaddleInfer" + engine.type = "PADDLE_INFER" self.model_toolkit_conf.engines.extend([engine]) @@ -306,9 +308,11 @@ def download_bin(self): #acquire lock version_file = open("{}/version.py".format(self.module_path), "r") - tar_name = self.get_serving_bin_name() + ".tar.gz" - bin_url = "https://paddle-serving.bj.bcebos.com/bin/serving-%s-%s.tar.gz" % ( - self.get_serving_bin_name(), serving_server_version) + folder_name = "serving-%s-%s" % (self.get_serving_bin_name(), + serving_server_version) + tar_name = "%s.tar.gz" % folder_name + bin_url = "https://paddle-serving.bj.bcebos.com/bin/%s" % tar_name + self.server_path = os.path.join(self.module_path, folder_name) download_flag = "{}/{}.is_download".format(self.module_path, From 8ad5abb28efe5b5dcbb34cede597463c719d8d11 Mon Sep 17 00:00:00 2001 From: zhangjun Date: Tue, 16 Mar 2021 13:27:27 +0000 Subject: [PATCH 10/12] fix cmake bug --- python/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 16bdbbbe8..463a26b6a 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -12,7 +12,6 @@ if (SERVER) elseif(WITH_XPU) set(SERVER_PACKAGE_NAME "paddle-serving-server-xpu") endif() - elseif(WITH_XPU) file(INSTALL pipeline DESTINATION paddle_serving_server) file(GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server/*.py) set(PY_FILES ${SERVING_SERVER_PY_FILES}) From 2d57e65ec9713fa9b588470b7669fce20dd4cc9a Mon Sep 17 00:00:00 2001 From: zhangjun Date: Wed, 17 Mar 2021 14:33:01 +0800 Subject: [PATCH 11/12] fix app setup.py for aarch64 --- python/setup.py.app.in | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/python/setup.py.app.in b/python/setup.py.app.in index e65e69656..a9b58a118 100644 --- a/python/setup.py.app.in +++ b/python/setup.py.app.in @@ -32,8 +32,13 @@ if '${PACK}' == 'ON': REQUIRED_PACKAGES = [ - 'six >= 1.10.0', 'sentencepiece<=0.1.83', 'opencv-python<=4.2.0.32', 'pillow', - 'pyclipper', 'shapely' + 'six >= 1.10.0', + 'pillow', + 'pyclipper', 'shapely', + 'sentencepiece<=0.1.83; platform_machine != "aarch64"', + 'sentencepiece; platform_machine == "aarch64"', + 'opencv-python<=4.2.0.32; platform_machine != "aarch64"', + 'opencv-python; platform_machine == "aarch64"', ] packages=['paddle_serving_app', From cd6f901a9c007db90cd8d760fa1fa400f7d59307 Mon Sep 17 00:00:00 2001 From: zhangjun Date: Wed, 17 Mar 2021 16:54:50 +0800 Subject: [PATCH 12/12] mkdir fix --- python/paddle_serving_server/serve.py | 2 +- python/paddle_serving_server/server.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/paddle_serving_server/serve.py b/python/paddle_serving_server/serve.py index 3f7ea5c75..248ff583d 100644 --- a/python/paddle_serving_server/serve.py +++ b/python/paddle_serving_server/serve.py @@ -233,7 +233,7 @@ def start_multi_card(args, serving_port=None): # pylint: disable=doc-string-mis env_gpus = [] if args.use_lite: print("run using paddle-lite.") - start_gpu_card_model(-1, -1, args) + start_gpu_card_model(-1, -1, serving_port, args) elif len(gpus) <= 0: print("gpu_ids not set, going to run cpu service.") start_gpu_card_model(-1, -1, serving_port, args) diff --git a/python/paddle_serving_server/server.py b/python/paddle_serving_server/server.py index daa315854..7ed32fed1 100644 --- a/python/paddle_serving_server/server.py +++ b/python/paddle_serving_server/server.py @@ -364,9 +364,9 @@ def prepare_server(self, cube_conf=None): if workdir == None: workdir = "./tmp" - os.system("mkdir {}".format(workdir)) + os.system("mkdir -p {}".format(workdir)) else: - os.system("mkdir {}".format(workdir)) + os.system("mkdir -p {}".format(workdir)) os.system("touch {}/fluid_time_file".format(workdir)) if not self.port_is_available(port):