diff --git a/easybuild/easyblocks/p/pytorch.py b/easybuild/easyblocks/p/pytorch.py index 13136e63e7e..7898be4d62f 100644 --- a/easybuild/easyblocks/p/pytorch.py +++ b/easybuild/easyblocks/p/pytorch.py @@ -44,18 +44,41 @@ if sys.version_info >= (3, 9): - from typing import NamedTuple - FailedTestNames = NamedTuple('FailedTestNames', [('error', list[str]), ('fail', list[str])]) - TestSuiteResult = NamedTuple('TestSuiteResult', [('name', str), ('summary', str)]) - TestResult = NamedTuple('TestResult', [('test_cnt', int), - ('error_cnt', int), - ('failure_cnt', int), - ('failed_suites', list[TestSuiteResult])]) + from dataclasses import dataclass + + @dataclass + class FailedTestNames: + """Hold list of tests names that failed with error or failure""" + error: list[str] + fail: list[str] + + @dataclass + class TestSuiteResult: + """Hold the name of a test suite and a summary of the failures""" + name: str + summary: str + + @dataclass + class TestResult: + """Status report and results of a test run""" + test_cnt: int + error_cnt: int + failure_cnt: int + failed_suites: list[TestSuiteResult] + terminated_suites: dict[str, str] # Name and signal of terminated suites + all_failed_suites: set[str] # Names of all failed suites else: from collections import namedtuple FailedTestNames = namedtuple('FailedTestNames', ('error', 'fail')) TestSuiteResult = namedtuple('TestSuiteResult', ('name', 'summary')) - TestResult = namedtuple('TestResult', ('test_cnt', 'error_cnt', 'failure_cnt', 'failed_suites')) + TerminatedTestSuite = namedtuple('TerminatedTestSuite', ('name', 'signal')) + TestResult = namedtuple('TestResult', ('test_cnt', + 'error_cnt', + 'failure_cnt', + 'failed_suites', + 'terminated_suites', + 'all_failed_suites' + )) def find_failed_test_names(tests_out): @@ -198,7 +221,17 @@ def get_count_for_pattern(regex, text): for m in re.finditer(regex, tests_out, re.M): test_cnt += sum(get_count_for_pattern(p, m.group("summary")) for p in count_patterns) - return TestResult(test_cnt=test_cnt, error_cnt=error_cnt, failure_cnt=failure_cnt, failed_suites=failed_suites) + # Gather all failed tests suites in case we missed any, + # e.g. when it exited due to syntax errors or with a signal such as SIGSEGV + failed_suites_and_signal = set( + re.findall(r"^(?P.*) failed!(?: Received signal: (\w+))?\s*$", tests_out, re.M) + ) + + return TestResult(test_cnt=test_cnt, error_cnt=error_cnt, failure_cnt=failure_cnt, + failed_suites=failed_suites, + # Assumes that the suite name is unique + terminated_suites={name: signal for name, signal in failed_suites_and_signal if signal}, + all_failed_suites={i[0] for i in failed_suites_and_signal}) class EB_PyTorch(PythonPackage): @@ -462,17 +495,17 @@ def test_step(self): 'excluded_tests': ' '.join(excluded_tests) }) - test_result = super(EB_PyTorch, self).test_step(return_output_ec=True) - if test_result is None: + parsed_test_result = super(EB_PyTorch, self).test_step(return_output_ec=True) + if parsed_test_result is None: if self.cfg['runtest'] is False: msg = "Do not set 'runtest' to False, use --skip-test-step instead." else: msg = "Tests did not run. Make sure 'runtest' is set to a command." raise EasyBuildError(msg) - tests_out, tests_ec = test_result + tests_out, tests_ec = parsed_test_result - # Show failed subtests to aid in debugging failures + # Show failed subtests, if any, to aid in debugging failures failed_test_names = find_failed_test_names(tests_out) if failed_test_names.error or failed_test_names.fail: msg = [] @@ -485,60 +518,92 @@ def test_step(self): self.log.warning("\n".join(msg)) # Create clear summary report - test_result = parse_test_log(tests_out) - failure_report = ['%s (%s)' % (suite.name, suite.summary) for suite in test_result.failed_suites] - failed_test_suites = set(suite.name for suite in test_result.failed_suites) - # Gather all failed tests suites in case we missed any (e.g. when it exited due to syntax errors) - # Also unique to be able to compare the lists below - all_failed_test_suites = set( - re.findall(r"^(?P.*) failed!(?: Received signal: \w+)?\s*$", tests_out, re.M) - ) - # If we missed any test suites prepend a list of all failed test suites + parsed_test_result = parse_test_log(tests_out) + # Use a list of messages we can later join together + failure_msgs = ['\t%s (%s)' % (suite.name, suite.summary) for suite in parsed_test_result.failed_suites] + # These were accounted for + failed_test_suites = set(suite.name for suite in parsed_test_result.failed_suites) + # Those are all that failed according to the summary output + all_failed_test_suites = parsed_test_result.all_failed_suites + # We should have determined all failed test suites and only those. + # Otherwise show the mismatch and terminate later if failed_test_suites != all_failed_test_suites: - failure_report = ['Failed tests (suites/files):'] + failure_report + failure_msgs.insert(0, 'Failed tests (suites/files):') # Test suites where we didn't match a specific regexp and hence likely didn't count the failures - failure_report.extend('+ %s' % t for t in sorted(all_failed_test_suites - failed_test_suites)) + uncounted_test_suites = all_failed_test_suites - failed_test_suites + if uncounted_test_suites: + failure_msgs.append('Could not count failed tests for the following test suites/files:') + for suite_name in sorted(uncounted_test_suites): + try: + signal = parsed_test_result.terminated_suites[suite_name] + reason = f'Terminated with {signal}' + except KeyError: + # Not ended with signal, might have failed due to e.g. syntax errors + reason = 'Undetected or did not run properly' + failure_msgs.append(f'\t{suite_name} ({reason})') # Test suites not included in the catch-all regexp but counted. Should be empty. - failure_report.extend('? %s' % t for t in sorted(failed_test_suites - all_failed_test_suites)) - - failure_report = '\n'.join(failure_report) + unexpected_test_suites = failed_test_suites - all_failed_test_suites + if unexpected_test_suites: + failure_msgs.append('Counted failures of tests from the following test suites/files that are not ' + 'contained in the summary output of PyTorch:') + failure_msgs.extend(sorted(unexpected_test_suites)) # Calculate total number of unsuccesful and total tests - failed_test_cnt = test_result.failure_cnt + test_result.error_cnt + failed_test_cnt = parsed_test_result.failure_cnt + parsed_test_result.error_cnt + # Only add count message if we detected any failed tests + if failed_test_cnt > 0: + failure_or_failures = 'failure' if parsed_test_result.failure_cnt == 1 else 'failures' + error_or_errors = 'error' if parsed_test_result.error_cnt == 1 else 'errors' + failure_msgs.insert(0, "%d test %s, %d test %s (out of %d):" % ( + parsed_test_result.failure_cnt, failure_or_failures, + parsed_test_result.error_cnt, error_or_errors, + parsed_test_result.test_cnt + )) + + # Assemble final report + failure_report = '\n'.join(failure_msgs) + + if failed_test_suites != all_failed_test_suites: + # Fail because we can't be sure how many tests failed + # so comparing to max_failed_tests cannot reasonably be done + if failed_test_suites | set(parsed_test_result.terminated_suites) == all_failed_test_suites: + # All failed test suites are either counted or terminated with a signal + msg = ('Failing because these test suites were terminated which makes it impossible' + 'to accurately count the failed tests: ') + msg += ", ".join("%s(%s)" % name_signal + for name_signal in sorted(parsed_test_result.terminated_suites.items())) + elif len(failed_test_suites) < len(all_failed_test_suites): + msg = ('Failing because not all failed tests could be determined. ' + 'Tests failed to start or the test accounting in the PyTorch EasyBlock needs updating!\n' + 'Missing: ' + ', '.join(sorted(all_failed_test_suites - failed_test_suites))) + else: + msg = ('Failing because there were unexpected failures detected: ' + + ', '.join(sorted(failed_test_suites - all_failed_test_suites))) + raise EasyBuildError(msg + '\n' + + 'You can check the test failures (in the log) manually and if they are harmless, ' + 'use --ignore-test-failures to make the test step pass.\n' + failure_report) if failed_test_cnt > 0: max_failed_tests = self.cfg['max_failed_tests'] - failure_or_failures = 'failure' if test_result.failure_cnt == 1 else 'failures' - error_or_errors = 'error' if test_result.error_cnt == 1 else 'errors' - msg = "%d test %s, %d test %s (out of %d):\n" % ( - test_result.failure_cnt, failure_or_failures, - test_result.error_cnt, error_or_errors, - test_result.test_cnt - ) - msg += failure_report - - # If no tests are supposed to fail or some failed for which we were not able to count errors fail now - if max_failed_tests == 0 or failed_test_suites != all_failed_test_suites: - raise EasyBuildError(msg) - else: - msg += '\n\n' + ' '.join([ - "The PyTorch test suite is known to include some flaky tests,", - "which may fail depending on the specifics of the system or the context in which they are run.", - "For this PyTorch installation, EasyBuild allows up to %d tests to fail." % max_failed_tests, - "We recommend to double check that the failing tests listed above ", - "are known to be flaky, or do not affect your intended usage of PyTorch.", - "In case of doubt, reach out to the EasyBuild community (via GitHub, Slack, or mailing list).", - ]) - # Print to console, the user should really be aware that we are accepting failing tests here... - print_warning(msg) - - # Also log this warning in the file log - self.log.warning(msg) - - if failed_test_cnt > max_failed_tests: - raise EasyBuildError("Too many failed tests (%d), maximum allowed is %d", - failed_test_cnt, max_failed_tests) + # If no tests are supposed to fail don't print the explanation, just fail + if max_failed_tests == 0: + raise EasyBuildError(failure_report) + msg = failure_report + '\n\n' + ' '.join([ + "The PyTorch test suite is known to include some flaky tests,", + "which may fail depending on the specifics of the system or the context in which they are run.", + "For this PyTorch installation, EasyBuild allows up to %d tests to fail." % max_failed_tests, + "We recommend to double check that the failing tests listed above ", + "are known to be flaky, or do not affect your intended usage of PyTorch.", + "In case of doubt, reach out to the EasyBuild community (via GitHub, Slack, or mailing list).", + ]) + # Print to console in addition to file, + # the user should really be aware that we are accepting failing tests here... + print_warning(msg, log=self.log) + + if failed_test_cnt > max_failed_tests: + raise EasyBuildError("Too many failed tests (%d), maximum allowed is %d", + failed_test_cnt, max_failed_tests) elif failure_report: raise EasyBuildError("Test ended with failures! Exit code: %s\n%s", tests_ec, failure_report) elif tests_ec: @@ -576,11 +641,11 @@ def make_module_req_guess(self): return guesses -if __name__ == '__main__': - arg = sys.argv[1] - if not os.path.isfile(arg): - raise RuntimeError('Expected a test result file to parse, got: ' + arg) - with open(arg, 'r') as f: +def parse_logfile(file): + """Parse the EB logfile and print the failed tests""" + if not os.path.isfile(file): + raise RuntimeError('Expected a test result file to parse, got: ' + file) + with open(file, 'r') as f: content = f.read() m = re.search(r'cmd .*python[^ ]* run_test\.py .* exited with exit code.*output', content) if m: @@ -592,3 +657,7 @@ def make_module_req_guess(self): print("Failed test names: ", find_failed_test_names(content)) print("Test result: ", parse_test_log(content)) + + +if __name__ == '__main__': + parse_logfile(sys.argv[1])