Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 30 additions & 35 deletions script/test-grpc-interop
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,21 @@
#

# This script runs a grpc interop long-running test.
# It requires bazel build //test/grpc:all
# It requires bazel build following targets:
# //test/grpc:all
# @org_golang_google_grpc//stress/metrics_client
# @org_golang_google_grpc//interop/server
# @org_golang_google_grpc//stress/client
#set -x

ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
. ${ROOT}/script/all-utilities \
|| { echo "Cannot load Bash utilities" ; exit 1 ; }

HOST=''
DURATION_IN_HOUR=0
RUN_LENGTH=120
TEST_CASES='empty_unary:10,large_unary:10,'
TEST_CASES+='empty_stream:10,client_streaming:10,half_duplex:10,ping_pong:20,server_streaming:10,'
TEST_CASES+='empty_stream:10,client_streaming:10,ping_pong:20,server_streaming:10,'
TEST_CASES+='status_code_and_message:10,custom_metadata:10'

while getopts :h:l:t: arg; do
Expand All @@ -52,16 +56,6 @@ done

[[ -n "${HOST}" ]] || error_exit 'Please specify a host with -h option.'

function print_test_metrics() {
local start_time=$(date +"%s")
while true; do
sleep 10
local curr_time=$(date +"%s")
echo -n "QPS report at $((curr_time - start_time)) seconds:"
$ROOT/bazel-bin/external/org_golang_google_grpc/stress/metrics_client/metrics_client --total_only
done
}

# Waits for the proxy and backend to start.
HOST_IP=${HOST%:*}
HOST_PORT=${HOST#*:}
Expand All @@ -70,49 +64,50 @@ retry $ROOT/bazel-bin/test/grpc/interop-client --server_port "${HOST_PORT}" \
--server_host "${HOST_IP}" \
|| error_exit 'Failed to send one request, the proxy did not start properly.'

DURATION_IN_SEC=$((DURATION_IN_HOUR * 60 * 60))
[[ ${DURATION_IN_SEC} -gt 120 ]] || DURATION_IN_SEC=120

echo "Starts interop stress test at $(date)."
echo "Test during is: $((DURATION_IN_SECONDS / 60)) minutes."
echo "Test during is: $((DURATION_IN_SEC / 60)) minutes."
echo "Test cases are: ${TEST_CASES}"

# Start a background print job.
print_test_metrics&
PRINT_JOB=$!
trap "kill ${PRINT_JOB}" EXIT
# Start a background test client job.
$ROOT/bazel-bin/external/org_golang_google_grpc/stress/client/client \
--server_addresses "${HOST}" \
--num_channels_per_server 200 \
--num_stubs_per_channel 1 \
--test_cases "${TEST_CASES}" 2> /dev/null&
TEST_JOB=$!
trap "kill ${TEST_JOB}" EXIT

START_TIME=$(date +"%s")
END_TIME=$((START_TIME + DURATION_IN_HOUR * 60 * 60))
END_TIME=$((START_TIME + DURATION_IN_SEC))
RUN_COUNT=0
SUCCESS_TIME=0
FAIL_COUNT=0

detect_memory_leak_init "http://${HOST}"

while true; do
CURR_TIME=$(date +"%s")
echo "Test time: $((CURR_TIME - START_TIME)) seconds, success time: ${SUCCESS_TIME} seconds."
((RUN_COUNT++))

timeout $((RUN_LENGTH + 10)) $ROOT/bazel-bin/external/org_golang_google_grpc/stress/client/client \
--server_addresses "${HOST}" \
--test_duration_secs "${RUN_LENGTH}" \
--num_channels_per_server 200 \
--num_stubs_per_channel 1 \
--test_cases "${TEST_CASES}" \
&& ((SUCCESS_TIME += RUN_LENGTH))

detect_memory_leak_check ${RUN_COUNT}
sleep 10
METRIC_RESULT=$("$ROOT/bazel-bin/external/org_golang_google_grpc/stress/metrics_client/metrics_client" \
--total_only --metrics_server_address=localhost:8081 2>&1)
QPS=$(echo ${METRIC_RESULT}|awk '{print $NF}')
echo "Metric report at $((CURR_TIME - START_TIME)) seconds: ${QPS} qps"
# Count non zero QPS as success.
[[ ${QPS} -gt 100 ]] || ((FAIL_COUNT++))
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems that at start time, the QPS is less than 100, which counted as failure : https://endpoints-jenkins.appspot-preview.com/job/esp/job/postsubmits-master/89/execution/node/466/log/

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should allow at least 1 fail_count. could you help to fix it?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added #105 .


# Break if test has run long enough.
[[ $(date +"%s") -lt ${END_TIME} ]] || break
done

END_TIME=$(date +"%s")
echo "Total test time: $((END_TIME - START_TIME)) seconds, success time: ${SUCCESS_TIME} seconds at $(date)."
echo "Total test count: ${RUN_COUNT}, failed count: ${FAIL_COUNT}."

TOTAL_TIME=$((END_TIME - START_TIME))
FAILURE_TIME=$((TOTAL_TIME - SUCCESS_TIME))
# If failure time is more than %5 of total test time, mark failed.
RESULT=0
[[ ${FAILURE_TIME} -gt $((TOTAL_TIME / 20)) ]] && RESULT=1
[[ ${FAIL_COUNT} -gt $((RUN_COUNT / 20)) ]] && RESULT=1

# We fail the test if memory increase is large.
detect_memory_leak_final && MEMORY_LEAK=0 || MEMORY_LEAK=1
Expand Down