PaddlePaddle · TeslaZhao · Apr 26, 2021 · Apr 19, 2021 · Apr 19, 2021 · Apr 20, 2021
diff --git a/doc/PIPELINE_SERVING_CN.md b/doc/PIPELINE_SERVING_CN.md
@@ -149,6 +149,8 @@ def __init__(name=None,
 
 
 
+
+
 ### 2. 普通 OP二次开发接口
 OP 二次开发的目的是满足业务开发人员控制OP处理策略。
 

diff --git a/python/examples/bert/README.md b/python/examples/bert/README.md
@@ -84,3 +84,9 @@ set environmental variable to specify which gpus are used, the command above mea
 ```
 curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "hello"}], "fetch":["pooled_output"]}' http://127.0.0.1:9292/bert/prediction
 ```
+
+## Benchmark
+``` shell
+bash benchmark.sh bert_seq128_model bert_seq128_client
+```
+The output log file of benchmark named `profile_log_bert_seq128_model`
diff --git a/python/examples/bert/README_CN.md b/python/examples/bert/README_CN.md
@@ -88,3 +88,10 @@ python bert_web_service_gpu.py bert_seq128_model/ 9292 #启动gpu预测服务
 ```
 curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "hello"}], "fetch":["pooled_output"]}' http://127.0.0.1:9292/bert/prediction
 ```
+
+## 性能测试
+``` shell
+bash benchmark.sh bert_seq128_model bert_seq128_client
+```
+性能测试的日志文件为profile_log_bert_seq128_model
+如需修改性能测试用例的参数，请修改benchmark.sh中的配置信息。
diff --git a/python/examples/bert/benchmark.py b/python/examples/bert/benchmark.py
@@ -21,6 +21,7 @@
 import time
 import json
 import requests
+import numpy as np
 from paddle_serving_client import Client
 from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args, show_latency
@@ -56,7 +57,11 @@ def single_func(idx, resource):
                 feed_batch = []
                 b_start = time.time()
                 for bi in range(args.batch_size):
-                    feed_batch.append(reader.process(dataset[bi]))
+                    feed_dict = reader.process(dataset[bi])
+                    for key in feed_dict.keys():
+                        feed_dict[key] = np.array(feed_dict[key]).reshape(
+                            (1, 128, 1))
+                    feed_batch.append(feed_dict)
                 b_end = time.time()
 
                 if profile_flags:
@@ -116,9 +121,7 @@ def single_func(idx, resource):
 
 if __name__ == '__main__':
     multi_thread_runner = MultiThreadRunner()
-    endpoint_list = [
-        "127.0.0.1:9292", "127.0.0.1:9293", "127.0.0.1:9294", "127.0.0.1:9295"
-    ]
+    endpoint_list = ["127.0.0.1:9292", "127.0.0.1:9293"]
     turns = 100
     start = time.time()
     result = multi_thread_runner.run(

diff --git a/python/examples/bert/benchmark.sh b/python/examples/bert/benchmark.sh
@@ -1,5 +1,5 @@
 rm profile_log*
-export CUDA_VISIBLE_DEVICES=0,1,2,3
+export CUDA_VISIBLE_DEVICES=0,1
 export FLAGS_profile_server=1
 export FLAGS_profile_client=1
 export FLAGS_serving_latency=1
@@ -12,7 +12,7 @@ else
     mkdir utilization
 fi
 #start server
-$PYTHONROOT/bin/python3 -m paddle_serving_server.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim  --ir_optim >  elog  2>&1 &
+$PYTHONROOT/bin/python3 -m paddle_serving_server.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1 --mem_optim  --ir_optim >  elog  2>&1 &
 sleep 5
 
 #warm up

diff --git a/python/examples/bert/benchmark_with_profile.sh b/python/examples/bert/benchmark_with_profile.sh
@@ -1,5 +1,5 @@
-export CUDA_VISIBLE_DEVICES=0,1,2,3
-python -m paddle_serving_server.serve --model bert_seq20_model/ --port 9295 --thread 4 --gpu_ids 0,1,2,3 2> elog > stdlog &
+export CUDA_VISIBLE_DEVICES=0,1
+python -m paddle_serving_server.serve --model bert_seq20_model/ --port 9295 --thread 4 --gpu_ids 0,1 2> elog > stdlog &
 export FLAGS_profile_client=1
 export FLAGS_profile_server=1
 sleep 5

diff --git a/python/examples/fit_a_line/README.md b/python/examples/fit_a_line/README.md
@@ -42,3 +42,9 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
 ``` shell
 curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}], "fetch":["price"]}' http://127.0.0.1:9393/uci/prediction
 ```
+
+## Benchmark
+``` shell
+bash benchmark.sh uci_housing_model uci_housing_client
+```
+The log file of benchmark named `profile_log_uci_housing_model`
diff --git a/python/examples/fit_a_line/README_CN.md b/python/examples/fit_a_line/README_CN.md
@@ -43,3 +43,10 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
 ``` shell
 curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}], "fetch":["price"]}' http://127.0.0.1:9393/uci/prediction
 ```
+
+## 性能测试
+``` shell
+bash benchmark.sh uci_housing_model uci_housing_client
+```
+性能测试的日志文件为profile_log_uci_housing_model
+如需修改性能测试用例的参数，请修改benchmark.sh中的配置信息。
diff --git a/python/examples/fit_a_line/benchmark.py b/python/examples/fit_a_line/benchmark.py
@@ -15,7 +15,7 @@
 
 from paddle_serving_client import Client
 from paddle_serving_client.utils import MultiThreadRunner
-from paddle_serving_client.utils import benchmark_args
+from paddle_serving_client.utils import benchmark_args, show_latency
 import time
 import paddle
 import sys
@@ -37,9 +37,6 @@ def single_func(idx, resource):
         client.connect([args.endpoint])
         start = time.time()
         for data in train_reader():
-            #new_data = np.zeros((1, 13)).astype("float32")
-            #new_data[0] = data[0][0]
-            #fetch_map = client.predict(feed={"x": new_data}, fetch=["price"], batch=True)
             fetch_map = client.predict(feed={"x": data[0][0]}, fetch=["price"])
         end = time.time()
         return [[end - start], [total_number]]
@@ -57,6 +54,17 @@ def single_func(idx, resource):
         return [[end - start], [total_number]]
 
 
+start = time.time()
 multi_thread_runner = MultiThreadRunner()
 result = multi_thread_runner.run(single_func, args.thread, {})
-print(result)
+end = time.time()
+total_cost = end - start
+avg_cost = 0
+for i in range(args.thread):
+    avg_cost += result[0][i]
+avg_cost = avg_cost / args.thread
+
+print("total cost: {}s".format(total_cost))
+print("each thread cost: {}s. ".format(avg_cost))
+print("qps: {}samples/s".format(args.batch_size * args.thread / total_cost))
+show_latency(result[1])
diff --git a/python/examples/util/show_profile.py b/python/examples/util/show_profile.py
@@ -5,6 +5,7 @@
 profile_file = sys.argv[1]
 thread_num = sys.argv[2]
 time_dict = collections.OrderedDict()
+query_count = 0
 
 
 def prase(line):
@@ -26,12 +27,15 @@ def prase(line):
 
 
 with open(profile_file) as f:
+    query_count = 0
     for line in f.readlines():
         line = line.strip().split("\t")
         if line[0] == "PROFILE":
             prase(line[2])
+            query_count += 1
 
 print("thread_num: {}".format(thread_num))
+print("query_count: {}".format(query_count))
 for name in time_dict:
     print("{} cost: {}s in each thread ".format(name, time_dict[name] / (
         1000000.0 * float(thread_num))))
Original file line number	Diff line number	Diff line change
Expand Up		@@ -149,6 +149,8 @@ def __init__(name=None,





		### 2. 普通 OP二次开发接口
		OP 二次开发的目的是满足业务开发人员控制OP处理策略。

Expand Down