Skip to content

Commit 425c14d

Browse files
authored
[CI] Add op-benchmark (#72991)
* test=document_fix * test=document_fix * test=document_fix * test=document_fix
1 parent 4f9d17a commit 425c14d

File tree

2 files changed

+140
-51
lines changed

2 files changed

+140
-51
lines changed

tools/ci_op_benchmark.sh renamed to tools/op_benchmark.sh

Lines changed: 19 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
# See the License for the specific language governing permissions and
1515
# limitations under the License.
1616

17-
set +ex
17+
set +e
18+
set -x
1819

1920
[ -z "$PADDLE_ROOT" ] && PADDLE_ROOT=$(cd $(dirname ${BASH_SOURCE[0]})/.. && pwd)
2021

@@ -94,35 +95,21 @@ function load_CHANGE_OP_FILES_by_header_file {
9495
# Load op files that PR changes
9596
function load_CHANGE_OP_FILES {
9697
LOG "[INFO] run function load_CHANGE_OP_FILES"
97-
local sub_dir change_file
98-
# TODO(Avin0323): Need to filter the files added by the new OP.
99-
for change_file in $(git diff --name-only develop)
100-
do
101-
# match directory limit
102-
[[ "$change_file" =~ "paddle/fluid/operators/" ]] || [[ "$change_file" =~ "paddle/phi/kernels/" ]] || continue
103-
# match file name limit
104-
if [[ "$change_file" =~ "_op.cu" || "$change_file" =~ "_kernel.cu" || "$change_file" =~ "_kernel_gpudnn.cu" ]]
105-
then
106-
# match cu file directory limit
107-
match_cu_file_directory $change_file || continue
108-
LOG "[INFO] Found \"${change_file}\" changed."
109-
CHANGE_OP_FILES[${#CHANGE_OP_FILES[@]}]="$change_file"
110-
elif [[ "$change_file" =~ ".h" ]]
111-
then
112-
match_h_file_directory $change_file || continue
113-
LOG "[INFO] Found \"${change_file}\" changed, keep searching."
114-
INCLUDE_SEARCH_MAP[${change_file}]="searched"
115-
load_CHANGE_OP_FILES_by_header_file $change_file
98+
file_patterns=("_op.cu" "_kernel.cu" "_kernel_gpudnn.cu")
99+
directories=("paddle/fluid/operators" "paddle/phi/kernels")
100+
for dir in "${directories[@]}"; do
101+
if [ -d "$dir" ]; then
102+
for pattern in "${file_patterns[@]}"; do
103+
while IFS= read -r file; do
104+
match_cu_file_directory $file || continue
105+
LOG "[INFO] Found \"${file}\"."
106+
CHANGE_OP_FILES+=("$file")
107+
done < <(find "$dir" -type f -name "*$pattern" 2>/dev/null)
108+
done
109+
else
110+
echo "Directory $dir does not exist."
116111
fi
117112
done
118-
if [ ${#CHANGE_OP_FILES[@]} -eq 0 ]; then
119-
LOG "[INFO] Uninstall PaddlePaddle ..."
120-
pip uninstall -y paddlepaddle paddlepaddle_gpu
121-
LOG "[INFO] Install PaddlePaddle ..."
122-
pip install build/pr_whl/*.whl
123-
collect_kernel_registry_info
124-
LOG "[INFO] No op to test, skip this ci." && exit 0
125-
fi
126113
}
127114

128115
# Clone benchmark repo
@@ -233,7 +220,7 @@ function check_op_benchmark_result {
233220
local logs_dir api_info_file check_status_code
234221
# default 3 times
235222
[ -z "${RETRY_TIMES}" ] && RETRY_TIMES=3
236-
logs_dir=$(pwd)/logs-test_pr
223+
logs_dir=$(pwd)/logs-pr_whl
237224
api_info_file=$(pwd)/api_info.txt
238225
for retry_time in $(seq 0 ${RETRY_TIMES})
239226
do
@@ -255,7 +242,7 @@ function check_op_benchmark_result {
255242
# check current result and update the file to benchmark test
256243
python ${PADDLE_ROOT}/tools/check_op_benchmark_result.py \
257244
--develop_logs_dir $(pwd)/logs-dev_whl \
258-
--pr_logs_dir $(pwd)/logs-test_pr \
245+
--pr_logs_dir $(pwd)/logs-pr_whl \
259246
--api_info_file ${api_info_file}
260247
check_status_code=$?
261248
# TODO(Avin0323): retry only if the performance check fails
@@ -315,24 +302,5 @@ function gpu_op_benchmark {
315302
exit 0
316303
}
317304

318-
319-
# The PR will pass quickly when get approval from specific person.
320-
set +x
321-
approval_line=$(curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000)
322-
if [ -n "${approval_line}" ]; then
323-
APPROVALS=$(echo ${approval_line} | python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 Xreki zhangting2020)
324-
LOG "[INFO] current pr ${GIT_PR_ID} got approvals: ${APPROVALS}"
325-
if [ "${APPROVALS}" == "TRUE" ]; then
326-
LOG "[INFO] ==================================="
327-
LOG "[INFO] current pr ${GIT_PR_ID} has got approvals. So, Pass CI directly!"
328-
LOG "[INFO] ==================================="
329-
exit 0
330-
fi
331-
fi
332-
333-
case $1 in
334-
run_op_benchmark)
335-
prepare_env
336-
gpu_op_benchmark
337-
;;
338-
esac
305+
prepare_env
306+
gpu_op_benchmark

tools/op_benchmark_count.py

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import argparse
16+
import re
17+
from collections import defaultdict
18+
19+
gpu_time_categories = {
20+
"within_1%": 0,
21+
"increase_1_to_5%": 0,
22+
"increase_above_5_to_10%": 0,
23+
"increase_above_10%": 0,
24+
"decrease_1_to_5%": 0,
25+
"decrease_above_5%": 0,
26+
}
27+
28+
total_time_categories = {
29+
"within_1%": 0,
30+
"increase_1_to_5%": 0,
31+
"increase_above_5_to_10%": 0,
32+
"increase_above_10%": 0,
33+
"decrease_1_to_5%": 0,
34+
"decrease_above_5%": 0,
35+
}
36+
37+
parser = argparse.ArgumentParser(
38+
description="Analyze time changes in log files"
39+
)
40+
parser.add_argument('file_name', type=str, help='The name of the log file')
41+
args = parser.parse_args()
42+
43+
gpu_time_pattern = re.compile(r"GPU time change: ([\d.-]*)")
44+
total_time_pattern = re.compile(r"Total time change: ([\d.-]+)%")
45+
error_pattern = re.compile(r'Check speed result with case "(.*?)"')
46+
47+
gpu_time_lines = 0
48+
error_cases = defaultdict(int)
49+
50+
with open(args.file_name, 'r') as file:
51+
for line in file:
52+
if "GPU time change" in line:
53+
gpu_time_lines += 1
54+
gpu_time_match = gpu_time_pattern.search(line)
55+
if gpu_time_match:
56+
gpu_time_change_str = gpu_time_match.group(1)
57+
gpu_time_change = (
58+
float(gpu_time_change_str) if gpu_time_change_str else 0.0
59+
)
60+
61+
if -1 < gpu_time_change < 1:
62+
gpu_time_categories["within_1%"] += 1
63+
elif 1 <= gpu_time_change < 5:
64+
gpu_time_categories["increase_1_to_5%"] += 1
65+
elif 5 <= gpu_time_change < 10:
66+
gpu_time_categories["increase_above_5_to_10%"] += 1
67+
elif gpu_time_change >= 10:
68+
gpu_time_categories["increase_above_10%"] += 1
69+
elif -5 < gpu_time_change <= -1:
70+
gpu_time_categories["decrease_1_to_5%"] += 1
71+
elif gpu_time_change <= -5:
72+
gpu_time_categories["decrease_above_5%"] += 1
73+
74+
elif "Total time change" in line:
75+
total_time_match = total_time_pattern.search(line)
76+
if total_time_match:
77+
total_time_change = float(total_time_match.group(1))
78+
79+
if -1 < total_time_change < 1:
80+
total_time_categories["within_1%"] += 1
81+
elif 1 <= total_time_change < 5:
82+
total_time_categories["increase_1_to_5%"] += 1
83+
elif 5 <= total_time_change < 10:
84+
total_time_categories["increase_above_5_to_10%"] += 1
85+
elif total_time_change >= 10:
86+
total_time_categories["increase_above_10%"] += 1
87+
elif -5 < total_time_change <= -1:
88+
total_time_categories["decrease_1_to_5%"] += 1
89+
elif total_time_change <= -5:
90+
total_time_categories["decrease_above_5%"] += 1
91+
92+
elif error_pattern.search(line):
93+
error_match = error_pattern.search(line)
94+
if error_match:
95+
case_name = error_match.group(1)
96+
error_cases[case_name] += 1
97+
98+
99+
def print_categories(categories, title):
100+
total = sum(categories.values())
101+
print(f"\n{title} Categories:")
102+
for category, count in categories.items():
103+
percentage = (count / total * 100) if total > 0 else 0
104+
print(f"{category}: {count} ({percentage:.2f}%)")
105+
106+
107+
print_categories(gpu_time_categories, "GPU Time Change")
108+
print_categories(total_time_categories, "Total Time Change")
109+
110+
total_errors = sum(error_cases.values())
111+
error_percentage = (
112+
(total_errors / gpu_time_lines * 100) if gpu_time_lines > 0 else 0
113+
)
114+
unique_errors = len(error_cases)
115+
116+
print(f"\nError Cases Total: {total_errors}")
117+
print(f"Error Lines Percentage: {error_percentage:.2f}%")
118+
print(f"Unique Error OP: {unique_errors}\n")
119+
120+
for case, count in error_cases.items():
121+
print(f"OP '{case}': {count} occurrences")

0 commit comments

Comments
 (0)