From 29eb6c07b239cda085379ba32f8286a095226f02 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Mon, 22 Jan 2024 10:12:57 +0100 Subject: [PATCH 1/5] Factor out test log parsing from pytorch test method Allow easier testing by invoking it with some string --- easybuild/easyblocks/p/pytorch.py | 301 ++++++++++++++++-------------- 1 file changed, 165 insertions(+), 136 deletions(-) diff --git a/easybuild/easyblocks/p/pytorch.py b/easybuild/easyblocks/p/pytorch.py index 62172fca10d..744fecc1d61 100644 --- a/easybuild/easyblocks/p/pytorch.py +++ b/easybuild/easyblocks/p/pytorch.py @@ -30,6 +30,7 @@ import os import re +import sys import tempfile import easybuild.tools.environment as env from easybuild.tools import LooseVersion @@ -42,6 +43,154 @@ from easybuild.tools.systemtools import POWER, get_cpu_architecture +if sys.version_info >= (3, 6): + from typing import NamedTuple + FailedTestNames = NamedTuple('FailedTestNames', [('error', list[str]), ('fail', list[str])]) + TestSuiteResult = NamedTuple('TestSuiteResult', [('name', str), ('summary', str)]) + TestResult = NamedTuple('TestResult', [('test_cnt', int), + ('error_cnt', int), + ('failure_cnt', int), + ('failed_suites', list[TestSuiteResult])]) +else: + from collections import namedtuple + FailedTestNames = namedtuple('FailedTestNames', ('error', 'fail')) + TestSuiteResult = namedtuple('TestSuiteResult', ('name', 'summary')) + TestResult = namedtuple('TestResult', ('test_cnt', 'error_cnt', 'failure_cnt', 'failed_suites')) + + +def find_failed_test_names(tests_out): + """Find failed names of failed test cases in the output of the test step + + Return sorted list of names in FailedTestNames tuple + """ + # patterns like + # === FAIL: test_add_scalar_relu (quantization.core.test_quantized_op.TestQuantizedOps) === + # --- ERROR: test_all_to_all_group_cuda (__main__.TestDistBackendWithSpawn) --- + regex = r"^[=-]+\n(FAIL|ERROR): (test_.*?)\s\(.*\n[=-]+\n" + failed_test_cases = re.findall(regex, tests_out, re.M) + # And patterns like: + # FAILED test_ops_gradients.py::TestGradientsCPU::test_fn_grad_linalg_det_singular_cpu_complex128 - [snip] + # FAILED [22.8699s] test_sparse_csr.py::TestSparseCompressedCPU::test_invalid_input_csr_large_cpu - [snip] + # FAILED [0.0623s] dynamo/test_dynamic_shapes.py::DynamicShapesExportTests::test_predispatch - [snip] + regex = r"^(FAILED) (?:\[.*?\] )?(?:\w|/)+\.py.*::(test_.*?) - " + failed_test_cases.extend(re.findall(regex, tests_out, re.M)) + return FailedTestNames(error=sorted(m[1] for m in failed_test_cases if m[0] == 'ERROR'), + fail=sorted(m[1] for m in failed_test_cases if m[0] != 'ERROR')) + + +def parse_test_log(tests_out): + """Parse the test output and return result as TestResult tuple""" + + def get_count_for_pattern(regex, text): + """Match the regexp containing a single group and return the integer value of the matched group. + Return zero if no or more than 1 match was found and warn for the latter case + """ + match = re.findall(regex, text) + if len(match) == 1: + return int(match[0]) + elif len(match) > 1: + # Shouldn't happen, but means something went wrong with the regular expressions. + # Throw warning, as the build might be fine, no need to error on this. + warn_msg = "Error in counting the number of test failures in the output of the PyTorch test suite.\n" + warn_msg += "Please check the EasyBuild log to verify the number of failures (if any) was acceptable." + print_warning(warn_msg) + return 0 + + failure_cnt = 0 + error_cnt = 0 + failed_suites = [] + + # Grep for patterns like: + # Ran 219 tests in 67.325s + # + # FAILED (errors=10, skipped=190, expected failures=6) + # test_fx failed! + regex = (r"^Ran (?P[0-9]+) tests.*$\n\n" + r"FAILED \((?P.*)\)$\n" + r"(?:^(?:(?!failed!).)*$\n){0,5}" + r"(?P.*) failed!(?: Received signal: \w+)?\s*$") + + for m in re.finditer(regex, tests_out, re.M): + # E.g. 'failures=3, errors=10, skipped=190, expected failures=6' + failure_summary = m.group('failure_summary') + total, test_suite = m.group('test_cnt', 'failed_test_suite_name') + failed_suites.append( + TestSuiteResult(test_suite, "{total} total tests, {failure_summary}".format( + total=total, failure_summary=failure_summary)) + ) + failure_cnt += get_count_for_pattern(r"(?.*) in [0-9]+\.*[0-9]*[a-zA-Z]* (\([0-9]+:[0-9]+:[0-9]+\) )?=+$\n" + r"(?:.*FINISHED PRINTING LOG FILE.*\n)?" + r"(?:^\s*\n)*" + r"(?P.*) failed!$" + ) + + for m in re.finditer(regex, tests_out, re.M): + # E.g. '2 failed, 128 passed, 2 skipped, 2 warnings' + failure_summary = m.group('failure_summary') + test_suite = m.group('failed_test_suite_name') + failed_suites.append(TestSuiteResult(test_suite, failure_summary)) + failure_cnt += get_count_for_pattern(r"([0-9]+) failed", failure_summary) + error_cnt += get_count_for_pattern(r"([0-9]+) error", failure_summary) + + # Grep for patterns like: + # AssertionError: 2 unit test(s) failed: + # DistributedDataParallelTest.test_find_unused_parameters_kwarg_debug_detail + # DistributedDataParallelTest.test_find_unused_parameters_kwarg_grad_is_view_debug_detail + # + # FINISHED PRINTING LOG FILE of distributed/test_c10d_nccl () + # + # distributed/test_c10d_nccl failed! + + regex = ( + r"^AssertionError: (?P[0-9]+ unit test\(s\) failed):\n" + r"(\s+.*\n)+" + r"(((?!failed!).)*\n){0,5}" + r"(?P.*) failed!$" + ) + + for m in re.finditer(regex, tests_out, re.M): + # E.g. '2 unit test(s) failed' + failure_summary = m.group('failure_summary') + test_suite = m.group('failed_test_suite_name') + failed_suites.append(TestSuiteResult(test_suite, failure_summary)) + failure_cnt += get_count_for_pattern(r"([0-9]+) unit test\(s\) failed", failure_summary) + + # Collect total number of tests + + # Pattern for tests ran with unittest like: + # Ran 3 tests in 0.387s + regex = r"^Ran (?P[0-9]+) tests in" + test_cnt = sum(int(hit) for hit in re.findall(regex, tests_out, re.M)) + # Pattern for tests ran with pytest like: + # ============ 286 passed, 18 skipped, 2 xfailed in 38.71s ============ + regex = r"=+ (?P.*) in \d+.* =+\n" + count_patterns = [re.compile(r"([0-9]+) " + reason) for reason in [ + "failed", + "passed", + "skipped", + "deselected", + "xfailed", + "xpassed", + ]] + for m in re.finditer(regex, tests_out, re.M): + test_cnt += sum(get_count_for_pattern(p, m.group("summary")) for p in count_patterns) + + return TestResult(test_cnt=test_cnt, error_cnt=error_cnt, failure_cnt=failure_cnt, failed_suites=failed_suites) + + class EB_PyTorch(PythonPackage): """Support for building/installing PyTorch.""" @@ -283,126 +432,21 @@ def test_step(self): tests_out, tests_ec = test_result # Show failed subtests to aid in debugging failures - # I.e. patterns like - # === FAIL: test_add_scalar_relu (quantization.core.test_quantized_op.TestQuantizedOps) === - # --- ERROR: test_all_to_all_group_cuda (__main__.TestDistBackendWithSpawn) --- - regex = r"^[=-]+\n(FAIL|ERROR): (test_.*?)\s\(.*\n[=-]+\n" - failed_test_cases = re.findall(regex, tests_out, re.M) - # And patterns like: - # FAILED test_ops_gradients.py::TestGradientsCPU::test_fn_grad_linalg_det_singular_cpu_complex128 - [snip] - # FAILED [22.8699s] test_sparse_csr.py::TestSparseCompressedCPU::test_invalid_input_csr_large_cpu - [snip] - # FAILED [0.0623s] dynamo/test_dynamic_shapes.py::DynamicShapesExportTests::test_predispatch - [snip] - regex = r"^(FAILED) (?:\[.*?\] )?(?:\w|/)+\.py.*::(test_.*?) - " - failed_test_cases.extend(re.findall(regex, tests_out, re.M)) - if failed_test_cases: - errored_test_cases = sorted(m[1] for m in failed_test_cases if m[0] == 'ERROR') - failed_test_cases = sorted(m[1] for m in failed_test_cases if m[0] != 'ERROR') + failed_test_names = find_failed_test_names(tests_out) + if failed_test_names.error or failed_test_names.fail: msg = [] - if errored_test_cases: + if failed_test_names.error: msg.append("Found %d individual tests that exited with an error: %s" - % (len(errored_test_cases), ', '.join(errored_test_cases))) - if failed_test_cases: + % (len(failed_test_names.error), ', '.join(failed_test_names.error))) + if failed_test_names.fail: msg.append("Found %d individual tests with failed assertions: %s" - % (len(failed_test_cases), ', '.join(failed_test_cases))) + % (len(failed_test_names.fail), ', '.join(failed_test_names.fail))) self.log.warning("\n".join(msg)) - def get_count_for_pattern(regex, text): - """Match the regexp containing a single group and return the integer value of the matched group. - Return zero if no or more than 1 match was found and warn for the latter case - """ - match = re.findall(regex, text) - if len(match) == 1: - return int(match[0]) - elif len(match) > 1: - # Shouldn't happen, but means something went wrong with the regular expressions. - # Throw warning, as the build might be fine, no need to error on this. - warn_msg = "Error in counting the number of test failures in the output of the PyTorch test suite.\n" - warn_msg += "Please check the EasyBuild log to verify the number of failures (if any) was acceptable." - print_warning(warn_msg) - return 0 - # Create clear summary report - failure_report = [] - failure_cnt = 0 - error_cnt = 0 - failed_test_suites = [] - - # Grep for patterns like: - # Ran 219 tests in 67.325s - # - # FAILED (errors=10, skipped=190, expected failures=6) - # test_fx failed! - regex = (r"^Ran (?P[0-9]+) tests.*$\n\n" - r"FAILED \((?P.*)\)$\n" - r"(?:^(?:(?!failed!).)*$\n){0,5}" - r"(?P.*) failed!(?: Received signal: \w+)?\s*$") - - for m in re.finditer(regex, tests_out, re.M): - # E.g. 'failures=3, errors=10, skipped=190, expected failures=6' - failure_summary = m.group('failure_summary') - total, test_suite = m.group('test_cnt', 'failed_test_suite_name') - failure_report.append("{test_suite} ({total} total tests, {failure_summary})".format( - test_suite=test_suite, total=total, failure_summary=failure_summary - )) - failure_cnt += get_count_for_pattern(r"(?.*) in [0-9]+\.*[0-9]*[a-zA-Z]* (\([0-9]+:[0-9]+:[0-9]+\) )?=+$\n" - r"(?:.*FINISHED PRINTING LOG FILE.*\n)?" - r"(?:^\s*\n)*" - r"(?P.*) failed!$" - ) - - for m in re.finditer(regex, tests_out, re.M): - # E.g. '2 failed, 128 passed, 2 skipped, 2 warnings' - failure_summary = m.group('failure_summary') - test_suite = m.group('failed_test_suite_name') - failure_report.append("{test_suite} ({failure_summary})".format( - test_suite=test_suite, failure_summary=failure_summary - )) - failure_cnt += get_count_for_pattern(r"([0-9]+) failed", failure_summary) - error_cnt += get_count_for_pattern(r"([0-9]+) error", failure_summary) - failed_test_suites.append(test_suite) - - # Grep for patterns like: - # AssertionError: 2 unit test(s) failed: - # DistributedDataParallelTest.test_find_unused_parameters_kwarg_debug_detail - # DistributedDataParallelTest.test_find_unused_parameters_kwarg_grad_is_view_debug_detail - # - # FINISHED PRINTING LOG FILE of distributed/test_c10d_nccl () - # - # distributed/test_c10d_nccl failed! - - regex = ( - r"^AssertionError: (?P[0-9]+ unit test\(s\) failed):\n" - r"(\s+.*\n)+" - r"(((?!failed!).)*\n){0,5}" - r"(?P.*) failed!$" - ) - - for m in re.finditer(regex, tests_out, re.M): - # E.g. '2 unit test(s) failed' - failure_summary = m.group('failure_summary') - test_suite = m.group('failed_test_suite_name') - failure_report.append("{test_suite} ({failure_summary})".format( - test_suite=test_suite, failure_summary=failure_summary - )) - failure_cnt += get_count_for_pattern(r"([0-9]+) unit test\(s\) failed", failure_summary) - failed_test_suites.append(test_suite) - - # Make the names unique - failed_test_suites = set(failed_test_suites) + test_result = parse_test_log(tests_out) + failure_report = ['%s (%s)' % (suite.name, suite.summary) for suite in test_result.failed_suites] + failed_test_suites = set(suite.name for suite in test_result.failed_suites) # Gather all failed tests suites in case we missed any (e.g. when it exited due to syntax errors) # Also unique to be able to compare the lists below all_failed_test_suites = set( @@ -419,32 +463,17 @@ def get_count_for_pattern(regex, text): failure_report = '\n'.join(failure_report) # Calculate total number of unsuccesful and total tests - failed_test_cnt = failure_cnt + error_cnt - # Pattern for tests ran with unittest like: - # Ran 3 tests in 0.387s - regex = r"^Ran (?P[0-9]+) tests in" - test_cnt = sum(int(hit) for hit in re.findall(regex, tests_out, re.M)) - # Pattern for tests ran with pytest like: - # ============ 286 passed, 18 skipped, 2 xfailed in 38.71s ============ - regex = r"=+ (?P.*) in \d+.* =+\n" - count_patterns = [re.compile(r"([0-9]+) " + reason) for reason in [ - "failed", - "passed", - "skipped", - "deselected", - "xfailed", - "xpassed", - ]] - for m in re.finditer(regex, tests_out, re.M): - test_cnt += sum(get_count_for_pattern(p, m.group("summary")) for p in count_patterns) + failed_test_cnt = test_result.failure_cnt + test_result.error_cnt if failed_test_cnt > 0: max_failed_tests = self.cfg['max_failed_tests'] - failure_or_failures = 'failure' if failure_cnt == 1 else 'failures' - error_or_errors = 'error' if error_cnt == 1 else 'errors' + failure_or_failures = 'failure' if test_result.failure_cnt == 1 else 'failures' + error_or_errors = 'error' if test_result.error_cnt == 1 else 'errors' msg = "%d test %s, %d test %s (out of %d):\n" % ( - failure_cnt, failure_or_failures, error_cnt, error_or_errors, test_cnt + test_result.failure_cnt, failure_or_failures, + test_result.error_cnt, error_or_errors, + test_result.test_cnt ) msg += failure_report @@ -470,7 +499,7 @@ def get_count_for_pattern(regex, text): raise EasyBuildError("Too many failed tests (%d), maximum allowed is %d", failed_test_cnt, max_failed_tests) elif failure_report: - raise EasyBuildError("Test command had non-zero exit code (%s)!\n%s", tests_ec, failure_report) + raise EasyBuildError("Test ended with failures! Exit code: %s\n%s", tests_ec, failure_report) elif tests_ec: raise EasyBuildError("Test command had non-zero exit code (%s), but no failed tests found?!", tests_ec) From 7e2ba44b8abcca456e1b7ad6014b595a07d27a7f Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Mon, 22 Jan 2024 10:32:30 +0100 Subject: [PATCH 2/5] Allow invoking pytorch easyblock file with test log Also accepts a full build log in which case it tries to find the test output --- easybuild/easyblocks/p/pytorch.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/easybuild/easyblocks/p/pytorch.py b/easybuild/easyblocks/p/pytorch.py index 744fecc1d61..b0e69e1c95e 100644 --- a/easybuild/easyblocks/p/pytorch.py +++ b/easybuild/easyblocks/p/pytorch.py @@ -531,3 +531,21 @@ def make_module_req_guess(self): # Required to dynamically load libcaffe2_nvrtc.so guesses['LD_LIBRARY_PATH'] = [os.path.join(self.pylibdir, 'torch', 'lib')] return guesses + + +if __name__ == '__main__': + arg = sys.argv[1] + if not os.path.isfile(arg): + raise RuntimeError('Expected a test result file to parse, got: ' + arg) + with open(arg, 'r') as f: + content = f.read() + m = re.search(r'cmd .*python[^ ]* run_test\.py .* exited with exit code.*output', content) + if m: + content = content[m.end():] + # Heuristic for next possible text added by EasyBuild + m = re.search(r'^== \d+-\d+-\d+ .* (pytorch\.py|EasyBuild)', content) + if m: + content = content[:m.start()] + + print("Failed test names: ", find_failed_test_names(content)) + print("Test result: ", parse_test_log(content)) From 81cb0bd611c44faa37505968d97674018d11d5ec Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Mon, 22 Jan 2024 10:59:13 +0100 Subject: [PATCH 3/5] Make error matching more resistant against new lines Remove empty lines before matching --- easybuild/easyblocks/p/pytorch.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/easybuild/easyblocks/p/pytorch.py b/easybuild/easyblocks/p/pytorch.py index b0e69e1c95e..8888c7fdeff 100644 --- a/easybuild/easyblocks/p/pytorch.py +++ b/easybuild/easyblocks/p/pytorch.py @@ -100,12 +100,15 @@ def get_count_for_pattern(regex, text): error_cnt = 0 failed_suites = [] + # Remove empty lines to make RegExs below simpler + tests_out = re.sub(r'^[ \t]*\n', '', tests_out, flags=re.MULTILINE) + # Grep for patterns like: # Ran 219 tests in 67.325s # # FAILED (errors=10, skipped=190, expected failures=6) # test_fx failed! - regex = (r"^Ran (?P[0-9]+) tests.*$\n\n" + regex = (r"^Ran (?P[0-9]+) tests.*$\n" r"FAILED \((?P.*)\)$\n" r"(?:^(?:(?!failed!).)*$\n){0,5}" r"(?P.*) failed!(?: Received signal: \w+)?\s*$") @@ -133,7 +136,6 @@ def get_count_for_pattern(regex, text): regex = ( r"^=+ (?P.*) in [0-9]+\.*[0-9]*[a-zA-Z]* (\([0-9]+:[0-9]+:[0-9]+\) )?=+$\n" r"(?:.*FINISHED PRINTING LOG FILE.*\n)?" - r"(?:^\s*\n)*" r"(?P.*) failed!$" ) From 63941ce897ae46a41b5d47bfe66a6ddd485ee16b Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Mon, 22 Jan 2024 10:59:41 +0100 Subject: [PATCH 4/5] Match test results from e.g. PyTorch 1.13 There is an extra line "If in CI, skip info is located in the xml test reports, please either go to s3 or the hud to download them" which throws of the matching. Include that. --- easybuild/easyblocks/p/pytorch.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/easybuild/easyblocks/p/pytorch.py b/easybuild/easyblocks/p/pytorch.py index 8888c7fdeff..7cf6580907f 100644 --- a/easybuild/easyblocks/p/pytorch.py +++ b/easybuild/easyblocks/p/pytorch.py @@ -129,12 +129,20 @@ def get_count_for_pattern(regex, text): # test_quantization failed! # OR: # ===================== 2 failed, 128 passed, 2 skipped, 2 warnings in 63.43s (01:03:43) ========= + # # FINISHED PRINTING LOG FILE + # test_quantization failed! + # OR: + # ===================== 2 failed, 128 passed, 2 skipped, 2 warnings in 63.43s (01:03:43) ========= + # If in CI, skip info is located in the xml test reports, please either go to s3 or the hud to download them + # + # FINISHED PRINTING LOG FILE of test_ops_gradients (/tmp/vsc40023/easybuil...) # # test_quantization failed! regex = ( r"^=+ (?P.*) in [0-9]+\.*[0-9]*[a-zA-Z]* (\([0-9]+:[0-9]+:[0-9]+\) )?=+$\n" + r"(?:.*skip info is located in the xml test reports.*\n)?" r"(?:.*FINISHED PRINTING LOG FILE.*\n)?" r"(?P.*) failed!$" ) From 5883275bc75d0af4d5d5e686fad9707990f38b50 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Mon, 22 Jan 2024 11:18:08 +0100 Subject: [PATCH 5/5] Use type hints for Python 3.9+ The type hint for lists isn't valid until 3.9 --- easybuild/easyblocks/p/pytorch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/easybuild/easyblocks/p/pytorch.py b/easybuild/easyblocks/p/pytorch.py index 7cf6580907f..c2242ab41fb 100644 --- a/easybuild/easyblocks/p/pytorch.py +++ b/easybuild/easyblocks/p/pytorch.py @@ -43,7 +43,7 @@ from easybuild.tools.systemtools import POWER, get_cpu_architecture -if sys.version_info >= (3, 6): +if sys.version_info >= (3, 9): from typing import NamedTuple FailedTestNames = NamedTuple('FailedTestNames', [('error', list[str]), ('fail', list[str])]) TestSuiteResult = NamedTuple('TestSuiteResult', [('name', str), ('summary', str)])