Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Benchmark merge #1175

Merged
merged 24 commits into from
Apr 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/PIPELINE_SERVING_CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ def __init__(name=None,





### 2. 普通 OP二次开发接口
OP 二次开发的目的是满足业务开发人员控制OP处理策略。

Expand Down
6 changes: 6 additions & 0 deletions python/examples/bert/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,9 @@ set environmental variable to specify which gpus are used, the command above mea
```
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "hello"}], "fetch":["pooled_output"]}' http://127.0.0.1:9292/bert/prediction
```

## Benchmark
``` shell
bash benchmark.sh bert_seq128_model bert_seq128_client
```
The output log file of benchmark named `profile_log_bert_seq128_model`
7 changes: 7 additions & 0 deletions python/examples/bert/README_CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,10 @@ python bert_web_service_gpu.py bert_seq128_model/ 9292 #启动gpu预测服务
```
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "hello"}], "fetch":["pooled_output"]}' http://127.0.0.1:9292/bert/prediction
```

## 性能测试
``` shell
bash benchmark.sh bert_seq128_model bert_seq128_client
```
性能测试的日志文件为profile_log_bert_seq128_model
如需修改性能测试用例的参数,请修改benchmark.sh中的配置信息。
11 changes: 7 additions & 4 deletions python/examples/bert/benchmark.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import time
import json
import requests
import numpy as np
from paddle_serving_client import Client
from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency
Expand Down Expand Up @@ -56,7 +57,11 @@ def single_func(idx, resource):
feed_batch = []
b_start = time.time()
for bi in range(args.batch_size):
feed_batch.append(reader.process(dataset[bi]))
feed_dict = reader.process(dataset[bi])
for key in feed_dict.keys():
feed_dict[key] = np.array(feed_dict[key]).reshape(
(1, 128, 1))
feed_batch.append(feed_dict)
b_end = time.time()

if profile_flags:
Expand Down Expand Up @@ -116,9 +121,7 @@ def single_func(idx, resource):

if __name__ == '__main__':
multi_thread_runner = MultiThreadRunner()
endpoint_list = [
"127.0.0.1:9292", "127.0.0.1:9293", "127.0.0.1:9294", "127.0.0.1:9295"
]
endpoint_list = ["127.0.0.1:9292", "127.0.0.1:9293"]
turns = 100
start = time.time()
result = multi_thread_runner.run(
Expand Down
4 changes: 2 additions & 2 deletions python/examples/bert/benchmark.sh
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
rm profile_log*
export CUDA_VISIBLE_DEVICES=0,1,2,3
export CUDA_VISIBLE_DEVICES=0,1
export FLAGS_profile_server=1
export FLAGS_profile_client=1
export FLAGS_serving_latency=1
Expand All @@ -12,7 +12,7 @@ else
mkdir utilization
fi
#start server
$PYTHONROOT/bin/python3 -m paddle_serving_server.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim --ir_optim > elog 2>&1 &
$PYTHONROOT/bin/python3 -m paddle_serving_server.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1 --mem_optim --ir_optim > elog 2>&1 &
sleep 5

#warm up
Expand Down
4 changes: 2 additions & 2 deletions python/examples/bert/benchmark_with_profile.sh
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
export CUDA_VISIBLE_DEVICES=0,1,2,3
python -m paddle_serving_server.serve --model bert_seq20_model/ --port 9295 --thread 4 --gpu_ids 0,1,2,3 2> elog > stdlog &
export CUDA_VISIBLE_DEVICES=0,1
python -m paddle_serving_server.serve --model bert_seq20_model/ --port 9295 --thread 4 --gpu_ids 0,1 2> elog > stdlog &
export FLAGS_profile_client=1
export FLAGS_profile_server=1
sleep 5
Expand Down
6 changes: 6 additions & 0 deletions python/examples/fit_a_line/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,9 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
``` shell
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}], "fetch":["price"]}' http://127.0.0.1:9393/uci/prediction
```

## Benchmark
``` shell
bash benchmark.sh uci_housing_model uci_housing_client
```
The log file of benchmark named `profile_log_uci_housing_model`
7 changes: 7 additions & 0 deletions python/examples/fit_a_line/README_CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,10 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
``` shell
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}], "fetch":["price"]}' http://127.0.0.1:9393/uci/prediction
```

## 性能测试
``` shell
bash benchmark.sh uci_housing_model uci_housing_client
```
性能测试的日志文件为profile_log_uci_housing_model
如需修改性能测试用例的参数,请修改benchmark.sh中的配置信息。
18 changes: 13 additions & 5 deletions python/examples/fit_a_line/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from paddle_serving_client import Client
from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args
from paddle_serving_client.utils import benchmark_args, show_latency
import time
import paddle
import sys
Expand All @@ -37,9 +37,6 @@ def single_func(idx, resource):
client.connect([args.endpoint])
start = time.time()
for data in train_reader():
#new_data = np.zeros((1, 13)).astype("float32")
#new_data[0] = data[0][0]
#fetch_map = client.predict(feed={"x": new_data}, fetch=["price"], batch=True)
fetch_map = client.predict(feed={"x": data[0][0]}, fetch=["price"])
end = time.time()
return [[end - start], [total_number]]
Expand All @@ -57,6 +54,17 @@ def single_func(idx, resource):
return [[end - start], [total_number]]


start = time.time()
multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(single_func, args.thread, {})
print(result)
end = time.time()
total_cost = end - start
avg_cost = 0
for i in range(args.thread):
avg_cost += result[0][i]
avg_cost = avg_cost / args.thread

print("total cost: {}s".format(total_cost))
print("each thread cost: {}s. ".format(avg_cost))
print("qps: {}samples/s".format(args.batch_size * args.thread / total_cost))
show_latency(result[1])
4 changes: 4 additions & 0 deletions python/examples/util/show_profile.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
profile_file = sys.argv[1]
thread_num = sys.argv[2]
time_dict = collections.OrderedDict()
query_count = 0


def prase(line):
Expand All @@ -26,12 +27,15 @@ def prase(line):


with open(profile_file) as f:
query_count = 0
for line in f.readlines():
line = line.strip().split("\t")
if line[0] == "PROFILE":
prase(line[2])
query_count += 1

print("thread_num: {}".format(thread_num))
print("query_count: {}".format(query_count))
for name in time_dict:
print("{} cost: {}s in each thread ".format(name, time_dict[name] / (
1000000.0 * float(thread_num))))