Skip to content

Commit

Permalink
Merge pull request #1175 from HexToString/benchmark_merge
Browse files Browse the repository at this point in the history
Benchmark merge
  • Loading branch information
TeslaZhao authored Apr 26, 2021
2 parents 8a5303f + aae7daf commit 08b5ffb
Show file tree
Hide file tree
Showing 10 changed files with 56 additions and 13 deletions.
2 changes: 2 additions & 0 deletions doc/PIPELINE_SERVING_CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ def __init__(name=None,





### 2. 普通 OP二次开发接口
OP 二次开发的目的是满足业务开发人员控制OP处理策略。

Expand Down
6 changes: 6 additions & 0 deletions python/examples/bert/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,9 @@ set environmental variable to specify which gpus are used, the command above mea
```
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "hello"}], "fetch":["pooled_output"]}' http://127.0.0.1:9292/bert/prediction
```

## Benchmark
``` shell
bash benchmark.sh bert_seq128_model bert_seq128_client
```
The output log file of benchmark named `profile_log_bert_seq128_model`
7 changes: 7 additions & 0 deletions python/examples/bert/README_CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,10 @@ python bert_web_service_gpu.py bert_seq128_model/ 9292 #启动gpu预测服务
```
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "hello"}], "fetch":["pooled_output"]}' http://127.0.0.1:9292/bert/prediction
```

## 性能测试
``` shell
bash benchmark.sh bert_seq128_model bert_seq128_client
```
性能测试的日志文件为profile_log_bert_seq128_model
如需修改性能测试用例的参数,请修改benchmark.sh中的配置信息。
11 changes: 7 additions & 4 deletions python/examples/bert/benchmark.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import time
import json
import requests
import numpy as np
from paddle_serving_client import Client
from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency
Expand Down Expand Up @@ -56,7 +57,11 @@ def single_func(idx, resource):
feed_batch = []
b_start = time.time()
for bi in range(args.batch_size):
feed_batch.append(reader.process(dataset[bi]))
feed_dict = reader.process(dataset[bi])
for key in feed_dict.keys():
feed_dict[key] = np.array(feed_dict[key]).reshape(
(1, 128, 1))
feed_batch.append(feed_dict)
b_end = time.time()

if profile_flags:
Expand Down Expand Up @@ -116,9 +121,7 @@ def single_func(idx, resource):

if __name__ == '__main__':
multi_thread_runner = MultiThreadRunner()
endpoint_list = [
"127.0.0.1:9292", "127.0.0.1:9293", "127.0.0.1:9294", "127.0.0.1:9295"
]
endpoint_list = ["127.0.0.1:9292", "127.0.0.1:9293"]
turns = 100
start = time.time()
result = multi_thread_runner.run(
Expand Down
4 changes: 2 additions & 2 deletions python/examples/bert/benchmark.sh
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
rm profile_log*
export CUDA_VISIBLE_DEVICES=0,1,2,3
export CUDA_VISIBLE_DEVICES=0,1
export FLAGS_profile_server=1
export FLAGS_profile_client=1
export FLAGS_serving_latency=1
Expand All @@ -12,7 +12,7 @@ else
mkdir utilization
fi
#start server
$PYTHONROOT/bin/python3 -m paddle_serving_server.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim --ir_optim > elog 2>&1 &
$PYTHONROOT/bin/python3 -m paddle_serving_server.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1 --mem_optim --ir_optim > elog 2>&1 &
sleep 5

#warm up
Expand Down
4 changes: 2 additions & 2 deletions python/examples/bert/benchmark_with_profile.sh
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
export CUDA_VISIBLE_DEVICES=0,1,2,3
python -m paddle_serving_server.serve --model bert_seq20_model/ --port 9295 --thread 4 --gpu_ids 0,1,2,3 2> elog > stdlog &
export CUDA_VISIBLE_DEVICES=0,1
python -m paddle_serving_server.serve --model bert_seq20_model/ --port 9295 --thread 4 --gpu_ids 0,1 2> elog > stdlog &
export FLAGS_profile_client=1
export FLAGS_profile_server=1
sleep 5
Expand Down
6 changes: 6 additions & 0 deletions python/examples/fit_a_line/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,9 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
``` shell
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}], "fetch":["price"]}' http://127.0.0.1:9393/uci/prediction
```

## Benchmark
``` shell
bash benchmark.sh uci_housing_model uci_housing_client
```
The log file of benchmark named `profile_log_uci_housing_model`
7 changes: 7 additions & 0 deletions python/examples/fit_a_line/README_CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,10 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
``` shell
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}], "fetch":["price"]}' http://127.0.0.1:9393/uci/prediction
```

## 性能测试
``` shell
bash benchmark.sh uci_housing_model uci_housing_client
```
性能测试的日志文件为profile_log_uci_housing_model
如需修改性能测试用例的参数,请修改benchmark.sh中的配置信息。
18 changes: 13 additions & 5 deletions python/examples/fit_a_line/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from paddle_serving_client import Client
from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args
from paddle_serving_client.utils import benchmark_args, show_latency
import time
import paddle
import sys
Expand All @@ -37,9 +37,6 @@ def single_func(idx, resource):
client.connect([args.endpoint])
start = time.time()
for data in train_reader():
#new_data = np.zeros((1, 13)).astype("float32")
#new_data[0] = data[0][0]
#fetch_map = client.predict(feed={"x": new_data}, fetch=["price"], batch=True)
fetch_map = client.predict(feed={"x": data[0][0]}, fetch=["price"])
end = time.time()
return [[end - start], [total_number]]
Expand All @@ -57,6 +54,17 @@ def single_func(idx, resource):
return [[end - start], [total_number]]


start = time.time()
multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(single_func, args.thread, {})
print(result)
end = time.time()
total_cost = end - start
avg_cost = 0
for i in range(args.thread):
avg_cost += result[0][i]
avg_cost = avg_cost / args.thread

print("total cost: {}s".format(total_cost))
print("each thread cost: {}s. ".format(avg_cost))
print("qps: {}samples/s".format(args.batch_size * args.thread / total_cost))
show_latency(result[1])
4 changes: 4 additions & 0 deletions python/examples/util/show_profile.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
profile_file = sys.argv[1]
thread_num = sys.argv[2]
time_dict = collections.OrderedDict()
query_count = 0


def prase(line):
Expand All @@ -26,12 +27,15 @@ def prase(line):


with open(profile_file) as f:
query_count = 0
for line in f.readlines():
line = line.strip().split("\t")
if line[0] == "PROFILE":
prase(line[2])
query_count += 1

print("thread_num: {}".format(thread_num))
print("query_count: {}".format(query_count))
for name in time_dict:
print("{} cost: {}s in each thread ".format(name, time_dict[name] / (
1000000.0 * float(thread_num))))

0 comments on commit 08b5ffb

Please sign in to comment.