From 4519cfa9cf7da9dab17a061a713245c2ad9fdf25 Mon Sep 17 00:00:00 2001 From: ShiningZhang Date: Thu, 21 Apr 2022 20:19:15 +0800 Subject: [PATCH 1/3] set trt dynamic shape for ocr --- core/configure/proto/server_configure.proto | 4 ++ .../op/general_detection_op.cpp | 2 +- examples/C++/PaddleOCR/ocr/README.md | 6 +- examples/C++/PaddleOCR/ocr/README_CN.md | 6 +- .../C++/PaddleOCR/ocr/det_debugger_server.py | 8 +-- examples/C++/PaddleOCR/ocr/det_web_server.py | 6 +- examples/C++/PaddleOCR/ocr/ocr_cpp_client.py | 8 +-- .../C++/PaddleOCR/ocr/ocr_debugger_server.py | 12 ++-- examples/C++/PaddleOCR/ocr/ocr_web_client.py | 2 +- examples/C++/PaddleOCR/ocr/ocr_web_server.py | 12 ++-- .../C++/PaddleOCR/ocr/rec_debugger_server.py | 8 +-- examples/C++/PaddleOCR/ocr/rec_web_client.py | 2 +- examples/C++/PaddleOCR/ocr/rec_web_server.py | 8 +-- .../paddle/include/paddle_engine.h | 53 +++++++++++++++++- python/paddle_serving_server/serve.py | 55 +++++++++++++++++++ python/paddle_serving_server/server.py | 28 ++++++++++ 16 files changed, 178 insertions(+), 42 deletions(-) diff --git a/core/configure/proto/server_configure.proto b/core/configure/proto/server_configure.proto index c974f0107..e6678f879 100755 --- a/core/configure/proto/server_configure.proto +++ b/core/configure/proto/server_configure.proto @@ -65,6 +65,10 @@ message EngineDesc { optional int32 batch_infer_size = 31 [ default = 32 ]; optional bool enable_overrun = 32 [ default = false ]; optional bool allow_split_request = 33 [ default = true ]; + optional int32 min_subgraph_size = 34 [ default = 3 ]; + map min_input_shape = 35; + map max_input_shape = 36; + map opt_input_shape = 37; }; // model_toolkit conf diff --git a/core/general-server/op/general_detection_op.cpp b/core/general-server/op/general_detection_op.cpp index b62a2d254..6a4fe15f2 100644 --- a/core/general-server/op/general_detection_op.cpp +++ b/core/general-server/op/general_detection_op.cpp @@ -244,7 +244,7 @@ int GeneralDetectionOp::inference() { databuf_char_out = reinterpret_cast(databuf_data_out); paddle::PaddleBuf paddleBuf(databuf_char_out, databuf_size_out); paddle::PaddleTensor tensor_out; - tensor_out.name = "image"; + tensor_out.name = "x"; tensor_out.dtype = paddle::PaddleDType::FLOAT32; tensor_out.shape = output_shape; tensor_out.data = paddleBuf; diff --git a/examples/C++/PaddleOCR/ocr/README.md b/examples/C++/PaddleOCR/ocr/README.md index 28849bf32..12f0fd6cd 100755 --- a/examples/C++/PaddleOCR/ocr/README.md +++ b/examples/C++/PaddleOCR/ocr/README.md @@ -4,9 +4,9 @@ ## Get Model ``` -wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/OCR/ocr_rec.tar.gz +python3 -m paddle_serving_app.package --get_model ocr_rec tar -xzvf ocr_rec.tar.gz -wget https://paddle-serving.bj.bcebos.com/ocr/ocr_det.tar.gz +python3 -m paddle_serving_app.package --get_model ocr_det tar -xzvf ocr_det.tar.gz ``` @@ -108,7 +108,7 @@ python3 rec_web_client.py When a service starts the concatenation of two models, it only needs to pass in the relative path of the model folder in order after `--model`, and the custom C++ OP class name after `--op`. The order of the model after `--model` and the class name after `--OP` needs to correspond. Here, it is assumed that we have defined the two OPs as GeneralDetectionOp and GeneralRecOp respectively, The script code is as follows: ```python #One service starts the concatenation of two models -python3 -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --op GeneralDetectionOp GeneralRecOp --port 9293 +python3 -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --op GeneralDetectionOp GeneralInferOp --port 9293 #ocr_det_model correspond to GeneralDetectionOp, ocr_rec_model correspond to GeneralRecOp ``` diff --git a/examples/C++/PaddleOCR/ocr/README_CN.md b/examples/C++/PaddleOCR/ocr/README_CN.md index d9671115e..a737531d7 100755 --- a/examples/C++/PaddleOCR/ocr/README_CN.md +++ b/examples/C++/PaddleOCR/ocr/README_CN.md @@ -4,9 +4,9 @@ ## 获取模型 ``` -wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/OCR/ocr_rec.tar.gz +python3 -m paddle_serving_app.package --get_model ocr_rec tar -xzvf ocr_rec.tar.gz -wget https://paddle-serving.bj.bcebos.com/ocr/ocr_det.tar.gz +python3 -m paddle_serving_app.package --get_model ocr_det tar -xzvf ocr_det.tar.gz ``` ## 获取数据集(可选) @@ -106,7 +106,7 @@ python3 rec_web_client.py 一个服务启动两个模型串联,只需要在`--model后依次按顺序传入模型文件夹的相对路径`,且需要在`--op后依次传入自定义C++OP类名称`,其中--model后面的模型与--op后面的类名称的顺序需要对应,`这里假设我们已经定义好了两个OP分别为GeneralDetectionOp和GeneralRecOp`,则脚本代码如下: ```python #一个服务启动多模型串联 -python3 -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --op GeneralDetectionOp GeneralRecOp --port 9293 +python3 -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --op GeneralDetectionOp GeneralInferOp --port 9293 #多模型串联 ocr_det_model对应GeneralDetectionOp ocr_rec_model对应GeneralRecOp ``` diff --git a/examples/C++/PaddleOCR/ocr/det_debugger_server.py b/examples/C++/PaddleOCR/ocr/det_debugger_server.py index 5b40fe937..6679ee0f4 100644 --- a/examples/C++/PaddleOCR/ocr/det_debugger_server.py +++ b/examples/C++/PaddleOCR/ocr/det_debugger_server.py @@ -47,18 +47,18 @@ def init_det(self): }) def preprocess(self, feed=[], fetch=[]): - data = base64.b64decode(feed[0]["image"].encode('utf8')) + data = base64.b64decode(feed[0]["x"].encode('utf8')) data = np.fromstring(data, np.uint8) im = cv2.imdecode(data, cv2.IMREAD_COLOR) self.ori_h, self.ori_w, _ = im.shape det_img = self.det_preprocess(im) _, self.new_h, self.new_w = det_img.shape return { - "image": det_img[np.newaxis, :].copy() - }, ["concat_1.tmp_0"], True + "x": det_img[np.newaxis, :].copy() + }, ["save_infer_model/scale_0.tmp_1"], True def postprocess(self, feed={}, fetch=[], fetch_map=None): - det_out = fetch_map["concat_1.tmp_0"] + det_out = fetch_map["save_infer_model/scale_0.tmp_1"] ratio_list = [ float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w ] diff --git a/examples/C++/PaddleOCR/ocr/det_web_server.py b/examples/C++/PaddleOCR/ocr/det_web_server.py index d38686e5a..d52f4b447 100644 --- a/examples/C++/PaddleOCR/ocr/det_web_server.py +++ b/examples/C++/PaddleOCR/ocr/det_web_server.py @@ -47,17 +47,17 @@ def init_det(self): }) def preprocess(self, feed=[], fetch=[]): - data = base64.b64decode(feed[0]["image"].encode('utf8')) + data = base64.b64decode(feed[0]["x"].encode('utf8')) data = np.fromstring(data, np.uint8) im = cv2.imdecode(data, cv2.IMREAD_COLOR) self.ori_h, self.ori_w, _ = im.shape det_img = self.det_preprocess(im) _, self.new_h, self.new_w = det_img.shape print(det_img) - return {"image": det_img}, ["concat_1.tmp_0"], False + return {"x": det_img}, ["save_infer_model/scale_0.tmp_1"], False def postprocess(self, feed={}, fetch=[], fetch_map=None): - det_out = fetch_map["concat_1.tmp_0"] + det_out = fetch_map["save_infer_model/scale_0.tmp_1"] ratio_list = [ float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w ] diff --git a/examples/C++/PaddleOCR/ocr/ocr_cpp_client.py b/examples/C++/PaddleOCR/ocr/ocr_cpp_client.py index b3187f50e..507971c36 100644 --- a/examples/C++/PaddleOCR/ocr/ocr_cpp_client.py +++ b/examples/C++/PaddleOCR/ocr/ocr_cpp_client.py @@ -42,13 +42,11 @@ def cv2_to_base64(image): image_data = file.read() image = cv2_to_base64(image_data) fetch_map = client.predict( - feed={"image": image}, - fetch=["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"], + feed={"x": image}, + fetch=["save_infer_model/scale_0.tmp_1"], batch=True) result = {} - result["score"] = fetch_map["softmax_0.tmp_0"] - del fetch_map["softmax_0.tmp_0"] - rec_res = OCRReader().postprocess(fetch_map, with_score=False) + rec_res = OCRReader().postprocess_ocrv2(fetch_map, with_score=False) res_lst = [] for res in rec_res: res_lst.append(res[0]) diff --git a/examples/C++/PaddleOCR/ocr/ocr_debugger_server.py b/examples/C++/PaddleOCR/ocr/ocr_debugger_server.py index 88dd94a82..bb10dba44 100644 --- a/examples/C++/PaddleOCR/ocr/ocr_debugger_server.py +++ b/examples/C++/PaddleOCR/ocr/ocr_debugger_server.py @@ -48,7 +48,7 @@ def init_det_debugger(self, det_model_config): self.ocr_reader = OCRReader() def preprocess(self, feed=[], fetch=[]): - data = base64.b64decode(feed[0]["image"].encode('utf8')) + data = base64.b64decode(feed[0]["x"].encode('utf8')) data = np.fromstring(data, np.uint8) im = cv2.imdecode(data, cv2.IMREAD_COLOR) ori_h, ori_w, _ = im.shape @@ -57,7 +57,7 @@ def preprocess(self, feed=[], fetch=[]): det_img = det_img[np.newaxis, :] det_img = det_img.copy() det_out = self.det_client.predict( - feed={"image": det_img}, fetch=["concat_1.tmp_0"], batch=True) + feed={"x": det_img}, fetch=["save_infer_model/scale_0.tmp_1"], batch=True) filter_func = FilterBoxes(10, 10) post_func = DBPostProcess({ "thresh": 0.3, @@ -68,7 +68,7 @@ def preprocess(self, feed=[], fetch=[]): }) sorted_boxes = SortedBoxes() ratio_list = [float(new_h) / ori_h, float(new_w) / ori_w] - dt_boxes_list = post_func(det_out["concat_1.tmp_0"], [ratio_list]) + dt_boxes_list = post_func(det_out["save_infer_model/scale_0.tmp_1"], [ratio_list]) dt_boxes = filter_func(dt_boxes_list[0], [ori_h, ori_w]) dt_boxes = sorted_boxes(dt_boxes) get_rotate_crop_image = GetRotateCropImage() @@ -88,12 +88,12 @@ def preprocess(self, feed=[], fetch=[]): for id, img in enumerate(img_list): norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio) imgs[id] = norm_img - feed = {"image": imgs.copy()} - fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"] + feed = {"x": imgs.copy()} + fetch = ["save_infer_model/scale_0.tmp_1"] return feed, fetch, True def postprocess(self, feed={}, fetch=[], fetch_map=None): - rec_res = self.ocr_reader.postprocess(fetch_map, with_score=True) + rec_res = self.ocr_reader.postprocess_ocrv2(fetch_map, with_score=True) res_lst = [] for res in rec_res: res_lst.append(res[0]) diff --git a/examples/C++/PaddleOCR/ocr/ocr_web_client.py b/examples/C++/PaddleOCR/ocr/ocr_web_client.py index ce96a8bbc..91620e8ed 100644 --- a/examples/C++/PaddleOCR/ocr/ocr_web_client.py +++ b/examples/C++/PaddleOCR/ocr/ocr_web_client.py @@ -34,7 +34,7 @@ def cv2_to_base64(image): with open(os.path.join(test_img_dir, img_file), 'rb') as file: image_data1 = file.read() image = cv2_to_base64(image_data1) - data = {"feed": [{"image": image}], "fetch": ["res"]} + data = {"feed": [{"x": image}], "fetch": ["res"]} r = requests.post(url=url, headers=headers, data=json.dumps(data)) print(r) print(r.json()) diff --git a/examples/C++/PaddleOCR/ocr/ocr_web_server.py b/examples/C++/PaddleOCR/ocr/ocr_web_server.py index 58fc850c9..2273c7e1d 100644 --- a/examples/C++/PaddleOCR/ocr/ocr_web_server.py +++ b/examples/C++/PaddleOCR/ocr/ocr_web_server.py @@ -44,13 +44,13 @@ def init_det_client(self, det_port, det_client_config): self.ocr_reader = OCRReader() def preprocess(self, feed=[], fetch=[]): - data = base64.b64decode(feed[0]["image"].encode('utf8')) + data = base64.b64decode(feed[0]["x"].encode('utf8')) data = np.fromstring(data, np.uint8) im = cv2.imdecode(data, cv2.IMREAD_COLOR) ori_h, ori_w, _ = im.shape det_img = self.det_preprocess(im) det_out = self.det_client.predict( - feed={"image": det_img}, fetch=["concat_1.tmp_0"], batch=False) + feed={"x": det_img}, fetch=["save_infer_model/scale_0.tmp_1"], batch=False) _, new_h, new_w = det_img.shape filter_func = FilterBoxes(10, 10) post_func = DBPostProcess({ @@ -62,7 +62,7 @@ def preprocess(self, feed=[], fetch=[]): }) sorted_boxes = SortedBoxes() ratio_list = [float(new_h) / ori_h, float(new_w) / ori_w] - dt_boxes_list = post_func(det_out["concat_1.tmp_0"], [ratio_list]) + dt_boxes_list = post_func(det_out["save_infer_model/scale_0.tmp_1"], [ratio_list]) dt_boxes = filter_func(dt_boxes_list[0], [ori_h, ori_w]) dt_boxes = sorted_boxes(dt_boxes) get_rotate_crop_image = GetRotateCropImage() @@ -78,12 +78,12 @@ def preprocess(self, feed=[], fetch=[]): for img in img_list: norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio) feed_list.append(norm_img[np.newaxis, :]) - feed_batch = {"image": np.concatenate(feed_list, axis=0)} - fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"] + feed_batch = {"x": np.concatenate(feed_list, axis=0)} + fetch = ["save_infer_model/scale_0.tmp_1"] return feed_batch, fetch, True def postprocess(self, feed={}, fetch=[], fetch_map=None): - rec_res = self.ocr_reader.postprocess(fetch_map, with_score=True) + rec_res = self.ocr_reader.postprocess_ocrv2(fetch_map, with_score=True) res_lst = [] for res in rec_res: res_lst.append(res[0]) diff --git a/examples/C++/PaddleOCR/ocr/rec_debugger_server.py b/examples/C++/PaddleOCR/ocr/rec_debugger_server.py index f84463238..cb096ac1a 100644 --- a/examples/C++/PaddleOCR/ocr/rec_debugger_server.py +++ b/examples/C++/PaddleOCR/ocr/rec_debugger_server.py @@ -38,7 +38,7 @@ def init_rec(self): def preprocess(self, feed=[], fetch=[]): img_list = [] for feed_data in feed: - data = base64.b64decode(feed_data["image"].encode('utf8')) + data = base64.b64decode(feed_data["x"].encode('utf8')) data = np.fromstring(data, np.uint8) im = cv2.imdecode(data, cv2.IMREAD_COLOR) img_list.append(im) @@ -53,12 +53,12 @@ def preprocess(self, feed=[], fetch=[]): for i, img in enumerate(img_list): norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio) imgs[i] = norm_img - feed = {"image": imgs.copy()} - fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"] + feed = {"x": imgs.copy()} + fetch = ["save_infer_model/scale_0.tmp_1"] return feed, fetch, True def postprocess(self, feed={}, fetch=[], fetch_map=None): - rec_res = self.ocr_reader.postprocess(fetch_map, with_score=True) + rec_res = self.ocr_reader.postprocess_ocrv2(fetch_map, with_score=True) res_lst = [] for res in rec_res: res_lst.append(res[0]) diff --git a/examples/C++/PaddleOCR/ocr/rec_web_client.py b/examples/C++/PaddleOCR/ocr/rec_web_client.py index 312a21488..e78145b60 100644 --- a/examples/C++/PaddleOCR/ocr/rec_web_client.py +++ b/examples/C++/PaddleOCR/ocr/rec_web_client.py @@ -36,6 +36,6 @@ def cv2_to_base64(image): image_data1 = file.read() image = cv2_to_base64(image_data1) #data = {"feed": [{"image": image}], "fetch": ["res"]} - data = {"feed": [{"image": image}] * 3, "fetch": ["res"]} + data = {"feed": [{"x": image}] * 3, "fetch": ["res"]} r = requests.post(url=url, headers=headers, data=json.dumps(data)) print(r.json()) diff --git a/examples/C++/PaddleOCR/ocr/rec_web_server.py b/examples/C++/PaddleOCR/ocr/rec_web_server.py index 2db6e398d..1a6e45812 100644 --- a/examples/C++/PaddleOCR/ocr/rec_web_server.py +++ b/examples/C++/PaddleOCR/ocr/rec_web_server.py @@ -39,7 +39,7 @@ def preprocess(self, feed=[], fetch=[]): # TODO: to handle batch rec images img_list = [] for feed_data in feed: - data = base64.b64decode(feed_data["image"].encode('utf8')) + data = base64.b64decode(feed_data["x"].encode('utf8')) data = np.fromstring(data, np.uint8) im = cv2.imdecode(data, cv2.IMREAD_COLOR) img_list.append(im) @@ -55,12 +55,12 @@ def preprocess(self, feed=[], fetch=[]): norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio) imgs[i] = norm_img - feed = {"image": imgs.copy()} - fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"] + feed = {"x": imgs.copy()} + fetch = ["save_infer_model/scale_0.tmp_1"] return feed, fetch, True def postprocess(self, feed={}, fetch=[], fetch_map=None): - rec_res = self.ocr_reader.postprocess(fetch_map, with_score=True) + rec_res = self.ocr_reader.postprocess_ocrv2(fetch_map, with_score=True) res_lst = [] for res in rec_res: res_lst.append(res[0]) diff --git a/paddle_inference/paddle/include/paddle_engine.h b/paddle_inference/paddle/include/paddle_engine.h index bf8c98ede..fb499dbd3 100644 --- a/paddle_inference/paddle/include/paddle_engine.h +++ b/paddle_inference/paddle/include/paddle_engine.h @@ -225,6 +225,12 @@ class PaddleInferenceEngine : public EngineCore { config.SwitchIrOptim(true); } + int local_min_subgraph_size = min_subgraph_size; + if (engine_conf.has_min_subgraph_size()) { + local_min_subgraph_size = engine_conf.min_subgraph_size(); + LOG(INFO) << "local_min_subgraph_size=" << local_min_subgraph_size; + } + if (engine_conf.has_use_trt() && engine_conf.use_trt()) { config.SwitchIrOptim(true); if (!engine_conf.has_use_gpu() || !engine_conf.use_gpu()) { @@ -236,10 +242,55 @@ class PaddleInferenceEngine : public EngineCore { } config.EnableTensorRtEngine(1 << 20, max_batch, - min_subgraph_size, + local_min_subgraph_size, precision_type, false, FLAGS_use_calib); + std::map> min_input_shape; + std::map> max_input_shape; + std::map> optim_input_shape; + if (engine_conf.min_input_shape_size() > 0) { + for (auto& iter : engine_conf.min_input_shape()) { + std::string key = iter.first; + std::string value = iter.second; + std::istringstream ss(value); + std::string word; + std::vector arr; + while(ss >> word) { + arr.push_back(std::stoi(word)); + } + min_input_shape[key] = arr; + } + } + if (engine_conf.max_input_shape_size() > 0) { + for (auto& iter : engine_conf.max_input_shape()) { + std::string key = iter.first; + std::string value = iter.second; + std::istringstream ss(value); + std::string word; + std::vector arr; + while(ss >> word) { + arr.push_back(std::stoi(word)); + } + max_input_shape[key] = arr; + } + } + if (engine_conf.opt_input_shape_size() > 0) { + for (auto& iter : engine_conf.opt_input_shape()) { + std::string key = iter.first; + std::string value = iter.second; + std::istringstream ss(value); + std::string word; + std::vector arr; + while(ss >> word) { + arr.push_back(std::stoi(word)); + } + optim_input_shape[key] = arr; + } + } + config.SetTRTDynamicShapeInfo(min_input_shape, + max_input_shape, + optim_input_shape); LOG(INFO) << "create TensorRT predictor"; } diff --git a/python/paddle_serving_server/serve.py b/python/paddle_serving_server/serve.py index 09931dad8..3afc13bfd 100755 --- a/python/paddle_serving_server/serve.py +++ b/python/paddle_serving_server/serve.py @@ -222,6 +222,8 @@ def serve_args(): "--prometheus_port", type=int, default=19393, help="Port of the Prometheus") parser.add_argument( "--request_cache_size", type=int, default=0, help="Port of the Prometheus") + parser.add_argument( + "--min_subgraph_size", type=int, default="", nargs="+", help="gpu ids") return parser.parse_args() @@ -272,11 +274,14 @@ def start_gpu_card_model(gpu_mode, port, args): # pylint: disable=doc-string-mi read_op = op_maker.create('GeneralReaderOp') op_seq_maker.add_op(read_op) + is_ocr = False #如果dag_list_op不是空,那么证明通过--op 传入了自定义OP或自定义的DAG串联关系。 #此时,根据--op 传入的顺序去组DAG串联关系 if len(dag_list_op) > 0: for single_op in dag_list_op: op_seq_maker.add_op(op_maker.create(single_op)) + if single_op == "GeneralDetectionOp": + is_ocr = True #否则,仍然按照原有方式根虎--model去串联。 else: for idx, single_model in enumerate(model): @@ -287,6 +292,7 @@ def start_gpu_card_model(gpu_mode, port, args): # pylint: disable=doc-string-mi # 以后可能考虑不用python脚本来生成配置 if len(model) == 2 and idx == 0 and single_model == "ocr_det_model": infer_op_name = "GeneralDetectionOp" + is_ocr = True else: infer_op_name = "GeneralInferOp" general_infer_op = op_maker.create(infer_op_name) @@ -306,10 +312,14 @@ def start_gpu_card_model(gpu_mode, port, args): # pylint: disable=doc-string-mi server.set_enable_prometheus(args.enable_prometheus) server.set_prometheus_port(args.prometheus_port) server.set_request_cache_size(args.request_cache_size) + server.set_min_subgraph_size(args.min_subgraph_size) if args.use_trt and device == "gpu": server.set_trt() server.set_ir_optimize(True) + if is_ocr: + info = set_ocr_dynamic_shape_info() + server.set_trt_dynamic_shape_info(info) if args.gpu_multi_stream and device == "gpu": server.set_gpu_multi_stream() @@ -344,6 +354,51 @@ def start_gpu_card_model(gpu_mode, port, args): # pylint: disable=doc-string-mi use_encryption_model=args.use_encryption_model) server.run_server() +def set_ocr_dynamic_shape_info(): + info = [] + min_input_shape = { + "x": [1, 3, 50, 50], + "conv2d_182.tmp_0": [1, 1, 20, 20], + "nearest_interp_v2_2.tmp_0": [1, 1, 20, 20], + "nearest_interp_v2_3.tmp_0": [1, 1, 20, 20], + "nearest_interp_v2_4.tmp_0": [1, 1, 20, 20], + "nearest_interp_v2_5.tmp_0": [1, 1, 20, 20] + } + max_input_shape = { + "x": [1, 3, 1536, 1536], + "conv2d_182.tmp_0": [20, 200, 960, 960], + "nearest_interp_v2_2.tmp_0": [20, 200, 960, 960], + "nearest_interp_v2_3.tmp_0": [20, 200, 960, 960], + "nearest_interp_v2_4.tmp_0": [20, 200, 960, 960], + "nearest_interp_v2_5.tmp_0": [20, 200, 960, 960], + } + opt_input_shape = { + "x": [1, 3, 960, 960], + "conv2d_182.tmp_0": [3, 96, 240, 240], + "nearest_interp_v2_2.tmp_0": [3, 96, 240, 240], + "nearest_interp_v2_3.tmp_0": [3, 24, 240, 240], + "nearest_interp_v2_4.tmp_0": [3, 24, 240, 240], + "nearest_interp_v2_5.tmp_0": [3, 24, 240, 240], + } + det_info = { + "min_input_shape": min_input_shape, + "max_input_shape": max_input_shape, + "opt_input_shape": opt_input_shape, + } + info.append(det_info) + min_input_shape = {"x": [1, 3, 32, 10], "lstm_1.tmp_0": [1, 1, 128]} + max_input_shape = { + "x": [50, 3, 32, 1000], + "lstm_1.tmp_0": [500, 50, 128] + } + opt_input_shape = {"x": [6, 3, 32, 100], "lstm_1.tmp_0": [25, 5, 128]} + rec_info = { + "min_input_shape": min_input_shape, + "max_input_shape": max_input_shape, + "opt_input_shape": opt_input_shape, + } + info.append(rec_info) + return info def start_multi_card(args, serving_port=None): # pylint: disable=doc-string-missing diff --git a/python/paddle_serving_server/server.py b/python/paddle_serving_server/server.py index a1ed1c1c5..436d5d906 100755 --- a/python/paddle_serving_server/server.py +++ b/python/paddle_serving_server/server.py @@ -101,6 +101,8 @@ def __init__(self): self.enable_prometheus = False self.prometheus_port = 19393 self.request_cache_size = 0 + self.min_subgraph_size = [] + self.trt_dynamic_shape_info = [] def get_fetch_list(self, infer_node_idx=-1): fetch_names = [ @@ -211,6 +213,13 @@ def set_prometheus_port(self, prometheus_port): def set_request_cache_size(self, request_cache_size): self.request_cache_size = request_cache_size + def set_min_subgraph_size(self, min_subgraph_size): + if isinstance(min_subgraph_size, list): + self.min_subgraph_size = list(map(int, min_subgraph_size)) + + def set_trt_dynamic_shape_info(self, info): + self.trt_dynamic_shape_info = info + def _prepare_engine(self, model_config_paths, device, use_encryption_model): self.device = device if self.model_toolkit_conf == None: @@ -292,6 +301,25 @@ def _prepare_engine(self, model_config_paths, device, use_encryption_model): if use_encryption_model: engine.encrypted_model = True engine.type = "PADDLE_INFER" + if len(self.min_subgraph_size) > index: + engine.min_subgraph_size = self.min_subgraph_size[index] + if len(self.trt_dynamic_shape_info) > index: + dynamic_shape_info = self.trt_dynamic_shape_info[index] + try: + for key,value in dynamic_shape_info.items(): + shape_type = key + if shape_type == "min_input_shape": + local_map = engine.min_input_shape + if shape_type == "max_input_shape": + local_map = engine.max_input_shape + if shape_type == "opt_input_shape": + local_map = engine.opt_input_shape + for name,shape in value.items(): + local_value = ' '.join(str(i) for i in shape) + local_map[name] = local_value + except: + raise ValueError("Set TRT dynamic shape info error!") + self.model_toolkit_conf.append(server_sdk.ModelToolkitConf()) self.model_toolkit_conf[-1].engines.extend([engine]) index = index + 1 From 1e41fa426e2f1427d81796e04d7817b87a02dd0b Mon Sep 17 00:00:00 2001 From: ShiningZhang Date: Fri, 22 Apr 2022 19:57:21 +0800 Subject: [PATCH 2/3] fix comment&log --- paddle_inference/paddle/include/paddle_engine.h | 1 - python/paddle_serving_server/serve.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/paddle_inference/paddle/include/paddle_engine.h b/paddle_inference/paddle/include/paddle_engine.h index fb499dbd3..fff4f1c5e 100644 --- a/paddle_inference/paddle/include/paddle_engine.h +++ b/paddle_inference/paddle/include/paddle_engine.h @@ -228,7 +228,6 @@ class PaddleInferenceEngine : public EngineCore { int local_min_subgraph_size = min_subgraph_size; if (engine_conf.has_min_subgraph_size()) { local_min_subgraph_size = engine_conf.min_subgraph_size(); - LOG(INFO) << "local_min_subgraph_size=" << local_min_subgraph_size; } if (engine_conf.has_use_trt() && engine_conf.use_trt()) { diff --git a/python/paddle_serving_server/serve.py b/python/paddle_serving_server/serve.py index 3afc13bfd..0dbee2192 100755 --- a/python/paddle_serving_server/serve.py +++ b/python/paddle_serving_server/serve.py @@ -223,7 +223,7 @@ def serve_args(): parser.add_argument( "--request_cache_size", type=int, default=0, help="Port of the Prometheus") parser.add_argument( - "--min_subgraph_size", type=int, default="", nargs="+", help="gpu ids") + "--min_subgraph_size", type=int, default="", nargs="+", help="min_subgraph_size") return parser.parse_args() From 55d8936326ac730325a557030db908efa451a6aa Mon Sep 17 00:00:00 2001 From: ShiningZhang Date: Fri, 22 Apr 2022 20:49:33 +0800 Subject: [PATCH 3/3] fix --min_subgraph_size --- python/paddle_serving_server/serve.py | 2 +- python/paddle_serving_server/server.py | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/python/paddle_serving_server/serve.py b/python/paddle_serving_server/serve.py index 0dbee2192..c628ff4ee 100755 --- a/python/paddle_serving_server/serve.py +++ b/python/paddle_serving_server/serve.py @@ -223,7 +223,7 @@ def serve_args(): parser.add_argument( "--request_cache_size", type=int, default=0, help="Port of the Prometheus") parser.add_argument( - "--min_subgraph_size", type=int, default="", nargs="+", help="min_subgraph_size") + "--min_subgraph_size", type=str, default="", nargs="+", help="min_subgraph_size") return parser.parse_args() diff --git a/python/paddle_serving_server/server.py b/python/paddle_serving_server/server.py index 436d5d906..da20c9fe6 100755 --- a/python/paddle_serving_server/server.py +++ b/python/paddle_serving_server/server.py @@ -214,8 +214,12 @@ def set_request_cache_size(self, request_cache_size): self.request_cache_size = request_cache_size def set_min_subgraph_size(self, min_subgraph_size): - if isinstance(min_subgraph_size, list): - self.min_subgraph_size = list(map(int, min_subgraph_size)) + for s in min_subgraph_size: + try: + size = int(s) + except: + size = 3 + self.min_subgraph_size.append(size) def set_trt_dynamic_shape_info(self, info): self.trt_dynamic_shape_info = info