Skip to content

Commit a8d829d

Browse files
authored
Add timeout support to ansible-test. (ansible#53302)
* Add timeout support to ansible-test. * Fix ansible-test tar filename filter bug. * Update timeouts used on Shippable. * Kill subprocesses when parent process terminates. * Require explicit use of env --show option.
1 parent 44b347a commit a8d829d

File tree

9 files changed

+219
-21
lines changed

9 files changed

+219
-21
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -105,3 +105,4 @@ test/units/.coverage.*
105105
/test/integration/cloud-config-azure.yml
106106
/SYMLINK_CACHE.json
107107
changelogs/.plugin-cache.yaml
108+
.ansible-test-timeout.json

test/runner/lib/cli.py

+7
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
from lib.env import (
5353
EnvConfig,
5454
command_env,
55+
configure_timeout,
5556
)
5657

5758
from lib.sanity import (
@@ -95,6 +96,7 @@ def main():
9596
display.color = config.color
9697
display.info_stderr = (isinstance(config, SanityConfig) and config.lint) or (isinstance(config, IntegrationConfig) and config.list_targets)
9798
check_startup()
99+
configure_timeout(config)
98100

99101
display.info('RLIMIT_NOFILE: %s' % (CURRENT_RLIMIT_NOFILE,), verbosity=2)
100102
display.info('MAXFD: %d' % MAXFD, verbosity=2)
@@ -509,6 +511,11 @@ def parse_args():
509511
action='store_true',
510512
help='dump environment to disk')
511513

514+
env.add_argument('--timeout',
515+
type=int,
516+
metavar='MINUTES',
517+
help='timeout for future ansible-test commands (0 clears)')
518+
512519
if argcomplete:
513520
argcomplete.autocomplete(parser, always_complete_options=False, validator=lambda i, k: True)
514521

test/runner/lib/constants.py

+3
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,6 @@
33
# Setting a low soft RLIMIT_NOFILE value will improve the performance of subprocess.Popen on Python 2.x when close_fds=True.
44
# This will affect all Python subprocesses. It will also affect the current Python process if set before subprocess is imported for the first time.
55
SOFT_RLIMIT_NOFILE = 1024
6+
7+
# File used to track the ansible-test test execution timeout.
8+
TIMEOUT_PATH = '.ansible-test-timeout.json'

test/runner/lib/env.py

+128-1
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,17 @@
44

55
import datetime
66
import json
7+
import functools
78
import os
89
import platform
910
import re
11+
import signal
1012
import sys
13+
import time
1114

1215
from lib.config import (
1316
CommonConfig,
17+
TestConfig,
1418
)
1519

1620
from lib.util import (
@@ -34,6 +38,18 @@
3438
docker_version
3539
)
3640

41+
from lib.thread import (
42+
WrappedThread,
43+
)
44+
45+
from lib.constants import (
46+
TIMEOUT_PATH,
47+
)
48+
49+
from lib.test import (
50+
TestTimeout,
51+
)
52+
3753

3854
class EnvConfig(CommonConfig):
3955
"""Configuration for the tools command."""
@@ -43,14 +59,26 @@ def __init__(self, args):
4359
"""
4460
super(EnvConfig, self).__init__(args, 'env')
4561

46-
self.show = args.show or not args.dump
62+
self.show = args.show
4763
self.dump = args.dump
64+
self.timeout = args.timeout
4865

4966

5067
def command_env(args):
5168
"""
5269
:type args: EnvConfig
5370
"""
71+
show_dump_env(args)
72+
set_timeout(args)
73+
74+
75+
def show_dump_env(args):
76+
"""
77+
:type args: EnvConfig
78+
"""
79+
if not args.show and not args.dump:
80+
return
81+
5482
data = dict(
5583
ansible=dict(
5684
version=get_ansible_version(args),
@@ -84,6 +112,105 @@ def command_env(args):
84112
results_fd.write(json.dumps(data, sort_keys=True))
85113

86114

115+
def set_timeout(args):
116+
"""
117+
:type args: EnvConfig
118+
"""
119+
if args.timeout is None:
120+
return
121+
122+
if args.timeout:
123+
deadline = (datetime.datetime.utcnow() + datetime.timedelta(minutes=args.timeout)).strftime('%Y-%m-%dT%H:%M:%SZ')
124+
125+
display.info('Setting a %d minute test timeout which will end at: %s' % (args.timeout, deadline), verbosity=1)
126+
else:
127+
deadline = None
128+
129+
display.info('Clearing existing test timeout.', verbosity=1)
130+
131+
if args.explain:
132+
return
133+
134+
if deadline:
135+
data = dict(
136+
duration=args.timeout,
137+
deadline=deadline,
138+
)
139+
140+
with open(TIMEOUT_PATH, 'w') as timeout_fd:
141+
json.dump(data, timeout_fd, indent=4, sort_keys=True)
142+
elif os.path.exists(TIMEOUT_PATH):
143+
os.remove(TIMEOUT_PATH)
144+
145+
146+
def get_timeout():
147+
"""
148+
:rtype: dict[str, any] | None
149+
"""
150+
if not os.path.exists(TIMEOUT_PATH):
151+
return None
152+
153+
with open(TIMEOUT_PATH, 'r') as timeout_fd:
154+
data = json.load(timeout_fd)
155+
156+
data['deadline'] = datetime.datetime.strptime(data['deadline'], '%Y-%m-%dT%H:%M:%SZ')
157+
158+
return data
159+
160+
161+
def configure_timeout(args):
162+
"""
163+
:type args: CommonConfig
164+
"""
165+
if isinstance(args, TestConfig):
166+
configure_test_timeout(args) # only tests are subject to the timeout
167+
168+
169+
def configure_test_timeout(args):
170+
"""
171+
:type args: TestConfig
172+
"""
173+
timeout = get_timeout()
174+
175+
if not timeout:
176+
return
177+
178+
timeout_start = datetime.datetime.utcnow()
179+
timeout_duration = timeout['duration']
180+
timeout_deadline = timeout['deadline']
181+
timeout_remaining = timeout_deadline - timeout_start
182+
183+
test_timeout = TestTimeout(timeout_duration)
184+
185+
if timeout_remaining <= datetime.timedelta():
186+
test_timeout.write(args)
187+
188+
raise ApplicationError('The %d minute test timeout expired %s ago at %s.' % (
189+
timeout_duration, timeout_remaining * -1, timeout_deadline))
190+
191+
display.info('The %d minute test timeout expires in %s at %s.' % (
192+
timeout_duration, timeout_remaining, timeout_deadline), verbosity=1)
193+
194+
def timeout_handler(_dummy1, _dummy2):
195+
"""Runs when SIGUSR1 is received."""
196+
test_timeout.write(args)
197+
198+
raise ApplicationError('Tests aborted after exceeding the %d minute time limit.' % timeout_duration)
199+
200+
def timeout_waiter(timeout_seconds):
201+
"""
202+
:type timeout_seconds: int
203+
"""
204+
time.sleep(timeout_seconds)
205+
os.kill(os.getpid(), signal.SIGUSR1)
206+
207+
signal.signal(signal.SIGUSR1, timeout_handler)
208+
209+
instance = WrappedThread(functools.partial(timeout_waiter, timeout_remaining.seconds))
210+
instance.daemon = True
211+
instance.start()
212+
213+
87214
def show_dict(data, verbose, root_verbosity=0, path=None):
88215
"""
89216
:type data: dict[str, any]

test/runner/lib/pytar.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@
1111
ABC,
1212
)
1313

14+
from lib.constants import (
15+
TIMEOUT_PATH,
16+
)
17+
1418
# improve performance by disabling uid/gid lookups
1519
tarfile.pwd = None
1620
tarfile.grp = None
@@ -45,6 +49,7 @@ def __init__(self):
4549
self.ignore_files = (
4650
'.gitignore',
4751
'.gitdir',
52+
TIMEOUT_PATH,
4853
)
4954

5055
self.ignore_extensions = (
@@ -58,7 +63,7 @@ def ignore(self, item):
5863
:rtype: tarfile.TarInfo | None
5964
"""
6065
filename = os.path.basename(item.path)
61-
name, ext = os.path.splitext(filename)
66+
ext = os.path.splitext(filename)[1]
6267
dirs = os.path.split(item.path)
6368

6469
if not item.isdir():
@@ -68,7 +73,7 @@ def ignore(self, item):
6873
if item.path.startswith('./docs/docsite/_build/'):
6974
return None
7075

71-
if name in self.ignore_files:
76+
if filename in self.ignore_files:
7277
return None
7378

7479
if ext in self.ignore_extensions:

test/runner/lib/test.py

+36-2
Original file line numberDiff line numberDiff line change
@@ -57,15 +57,17 @@ def calculate_confidence(path, line, metadata):
5757

5858
class TestResult(object):
5959
"""Base class for test results."""
60-
def __init__(self, command, test, python_version=None):
60+
def __init__(self, command, test, python_version=None, enable_junit=False):
6161
"""
6262
:type command: str
6363
:type test: str
6464
:type python_version: str
65+
:type enable_junit: bool
6566
"""
6667
self.command = command
6768
self.test = test
6869
self.python_version = python_version
70+
self.enable_junit = enable_junit
6971
self.name = self.test or self.command
7072

7173
if self.python_version:
@@ -88,7 +90,7 @@ def write(self, args):
8890
if args.lint:
8991
self.write_lint()
9092

91-
if args.junit:
93+
if args.junit or self.enable_junit:
9294
if self.junit:
9395
self.write_junit(args)
9496
else:
@@ -159,6 +161,38 @@ def save_junit(self, args, test_case, properties=None):
159161
xml.write(report.encode('utf-8', 'strict'))
160162

161163

164+
class TestTimeout(TestResult):
165+
"""Test timeout."""
166+
def __init__(self, timeout_duration):
167+
"""
168+
:type timeout_duration: int
169+
"""
170+
super(TestTimeout, self).__init__(command='timeout', test='', enable_junit=True)
171+
172+
self.timeout_duration = timeout_duration
173+
174+
def write_junit(self, args):
175+
"""
176+
:type args: TestConfig
177+
"""
178+
message = 'Tests were aborted after exceeding the %d minute time limit.' % self.timeout_duration
179+
output = '''One or more of the following situations may be responsible:
180+
181+
- Code changes have resulted in tests that hang or run for an excessive amount of time.
182+
- Tests have been added which exceed the time limit when combined with existing tests.
183+
- Test infrastructure and/or external dependencies are operating slower than normal.'''
184+
185+
if args.coverage:
186+
output += '\n- Additional overhead from collecting code coverage has resulted in tests exceeding the time limit.'
187+
188+
output += '\n\nConsult the console log for additional details on where the timeout occurred.'
189+
190+
test_case = self.junit.TestCase(classname=self.command, name=self.name)
191+
test_case.add_error_info(message, output)
192+
193+
self.save_junit(args, test_case)
194+
195+
162196
class TestSuccess(TestResult):
163197
"""Test success."""
164198
def write_junit(self, args):

test/runner/lib/util.py

+22-15
Original file line numberDiff line numberDiff line change
@@ -380,23 +380,30 @@ def raw_command(cmd, capture=False, env=None, data=None, cwd=None, explain=False
380380
stderr = None
381381

382382
start = time.time()
383+
process = None
383384

384385
try:
385-
process = subprocess.Popen(cmd, env=env, stdin=stdin, stdout=stdout, stderr=stderr, cwd=cwd)
386-
except OSError as ex:
387-
if ex.errno == errno.ENOENT:
388-
raise ApplicationError('Required program "%s" not found.' % cmd[0])
389-
raise
390-
391-
if communicate:
392-
encoding = 'utf-8'
393-
data_bytes = data.encode(encoding, 'surrogateescape') if data else None
394-
stdout_bytes, stderr_bytes = process.communicate(data_bytes)
395-
stdout_text = stdout_bytes.decode(encoding, str_errors) if stdout_bytes else u''
396-
stderr_text = stderr_bytes.decode(encoding, str_errors) if stderr_bytes else u''
397-
else:
398-
process.wait()
399-
stdout_text, stderr_text = None, None
386+
try:
387+
process = subprocess.Popen(cmd, env=env, stdin=stdin, stdout=stdout, stderr=stderr, cwd=cwd)
388+
except OSError as ex:
389+
if ex.errno == errno.ENOENT:
390+
raise ApplicationError('Required program "%s" not found.' % cmd[0])
391+
raise
392+
393+
if communicate:
394+
encoding = 'utf-8'
395+
data_bytes = data.encode(encoding, 'surrogateescape') if data else None
396+
stdout_bytes, stderr_bytes = process.communicate(data_bytes)
397+
stdout_text = stdout_bytes.decode(encoding, str_errors) if stdout_bytes else u''
398+
stderr_text = stderr_bytes.decode(encoding, str_errors) if stderr_bytes else u''
399+
else:
400+
process.wait()
401+
stdout_text, stderr_text = None, None
402+
finally:
403+
if process and process.returncode is None:
404+
process.kill()
405+
display.info('') # the process we're interrupting may have completed a partial line of output
406+
display.notice('Killed command to avoid an orphaned child process during handling of an unexpected exception.')
400407

401408
status = process.returncode
402409
runtime = time.time() - start

test/utils/shippable/shippable.sh

+7-1
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,12 @@ function cleanup
116116

117117
trap cleanup EXIT
118118

119-
ansible-test env --dump --show --color -v
119+
if [[ "${COVERAGE:-}" ]]; then
120+
timeout=60
121+
else
122+
timeout=45
123+
fi
124+
125+
ansible-test env --dump --show --timeout "${timeout}" --color -v
120126

121127
"test/utils/shippable/${script}.sh" "${test}"

test/utils/shippable/units.sh

+8
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,13 @@ IFS='/:' read -ra args <<< "$1"
77

88
version="${args[1]}"
99

10+
if [[ "${COVERAGE:-}" ]]; then
11+
timeout=90
12+
else
13+
timeout=10
14+
fi
15+
16+
ansible-test env --timeout "${timeout}" --color -v
17+
1018
# shellcheck disable=SC2086
1119
ansible-test units --color -v --docker default --python "${version}" ${COVERAGE:+"$COVERAGE"} ${CHANGED:+"$CHANGED"} \

0 commit comments

Comments
 (0)