easybuilders · branfosj · Feb 13, 2024 · Jan 22, 2024 · Jan 22, 2024 · Jan 22, 2024
diff --git a/easybuild/easyblocks/p/pytorch.py b/easybuild/easyblocks/p/pytorch.py
@@ -30,6 +30,7 @@
 
 import os
 import re
+import sys
 import tempfile
 import easybuild.tools.environment as env
 from easybuild.tools import LooseVersion
@@ -42,6 +43,164 @@
 from easybuild.tools.systemtools import POWER, get_cpu_architecture
 
 
+if sys.version_info >= (3, 9):
+    from typing import NamedTuple
+    FailedTestNames = NamedTuple('FailedTestNames', [('error', list[str]), ('fail', list[str])])
+    TestSuiteResult = NamedTuple('TestSuiteResult', [('name', str), ('summary', str)])
+    TestResult = NamedTuple('TestResult', [('test_cnt', int),
+                                           ('error_cnt', int),
+                                           ('failure_cnt', int),
+                                           ('failed_suites', list[TestSuiteResult])])
+else:
+    from collections import namedtuple
+    FailedTestNames = namedtuple('FailedTestNames', ('error', 'fail'))
+    TestSuiteResult = namedtuple('TestSuiteResult', ('name', 'summary'))
+    TestResult = namedtuple('TestResult', ('test_cnt', 'error_cnt', 'failure_cnt', 'failed_suites'))
+
+
+def find_failed_test_names(tests_out):
+    """Find failed names of failed test cases in the output of the test step
+
+    Return sorted list of names in FailedTestNames tuple
+    """
+    # patterns like
+    # === FAIL: test_add_scalar_relu (quantization.core.test_quantized_op.TestQuantizedOps) ===
+    # --- ERROR: test_all_to_all_group_cuda (__main__.TestDistBackendWithSpawn) ---
+    regex = r"^[=-]+\n(FAIL|ERROR): (test_.*?)\s\(.*\n[=-]+\n"
+    failed_test_cases = re.findall(regex, tests_out, re.M)
+    # And patterns like:
+    # FAILED test_ops_gradients.py::TestGradientsCPU::test_fn_grad_linalg_det_singular_cpu_complex128 - [snip]
+    # FAILED [22.8699s] test_sparse_csr.py::TestSparseCompressedCPU::test_invalid_input_csr_large_cpu - [snip]
+    # FAILED [0.0623s] dynamo/test_dynamic_shapes.py::DynamicShapesExportTests::test_predispatch -  [snip]
+    regex = r"^(FAILED) (?:\[.*?\] )?(?:\w|/)+\.py.*::(test_.*?) - "
+    failed_test_cases.extend(re.findall(regex, tests_out, re.M))
+    return FailedTestNames(error=sorted(m[1] for m in failed_test_cases if m[0] == 'ERROR'),
+                           fail=sorted(m[1] for m in failed_test_cases if m[0] != 'ERROR'))
+
+
+def parse_test_log(tests_out):
+    """Parse the test output and return result as TestResult tuple"""
+
+    def get_count_for_pattern(regex, text):
+        """Match the regexp containing a single group and return the integer value of the matched group.
+            Return zero if no or more than 1 match was found and warn for the latter case
+        """
+        match = re.findall(regex, text)
+        if len(match) == 1:
+            return int(match[0])
+        elif len(match) > 1:
+            # Shouldn't happen, but means something went wrong with the regular expressions.
+            # Throw warning, as the build might be fine, no need to error on this.
+            warn_msg = "Error in counting the number of test failures in the output of the PyTorch test suite.\n"
+            warn_msg += "Please check the EasyBuild log to verify the number of failures (if any) was acceptable."
+            print_warning(warn_msg)
+        return 0
+
+    failure_cnt = 0
+    error_cnt = 0
+    failed_suites = []
+
+    # Remove empty lines to make RegExs below simpler
+    tests_out = re.sub(r'^[ \t]*\n', '', tests_out, flags=re.MULTILINE)
+
+    # Grep for patterns like:
+    # Ran 219 tests in 67.325s
+    #
+    # FAILED (errors=10, skipped=190, expected failures=6)
+    # test_fx failed!
+    regex = (r"^Ran (?P<test_cnt>[0-9]+) tests.*$\n"
+             r"FAILED \((?P<failure_summary>.*)\)$\n"
+             r"(?:^(?:(?!failed!).)*$\n){0,5}"
+             r"(?P<failed_test_suite_name>.*) failed!(?: Received signal: \w+)?\s*$")
+
+    for m in re.finditer(regex, tests_out, re.M):
+        # E.g. 'failures=3, errors=10, skipped=190, expected failures=6'
+        failure_summary = m.group('failure_summary')
+        total, test_suite = m.group('test_cnt', 'failed_test_suite_name')
+        failed_suites.append(
+            TestSuiteResult(test_suite, "{total} total tests, {failure_summary}".format(
+                total=total, failure_summary=failure_summary))
+        )
+        failure_cnt += get_count_for_pattern(r"(?<!expected )failures=([0-9]+)", failure_summary)
+        error_cnt += get_count_for_pattern(r"errors=([0-9]+)", failure_summary)
+
+    # Grep for patterns like:
+    # ===================== 2 failed, 128 passed, 2 skipped, 2 warnings in 3.43s =====================
+    # test_quantization failed!
+    # OR:
+    # ===================== 2 failed, 128 passed, 2 skipped, 2 warnings in 63.43s (01:03:43) =========
+    #
+    # FINISHED PRINTING LOG FILE
+    # test_quantization failed!
+    # OR:
+    # ===================== 2 failed, 128 passed, 2 skipped, 2 warnings in 63.43s (01:03:43) =========
+    # If in CI, skip info is located in the xml test reports, please either go to s3 or the hud to download them
+    #
+    # FINISHED PRINTING LOG FILE of test_ops_gradients (/tmp/vsc40023/easybuil...)
+    #
+    # test_quantization failed!
+
+    regex = (
+        r"^=+ (?P<failure_summary>.*) in [0-9]+\.*[0-9]*[a-zA-Z]* (\([0-9]+:[0-9]+:[0-9]+\) )?=+$\n"
+        r"(?:.*skip info is located in the xml test reports.*\n)?"
+        r"(?:.*FINISHED PRINTING LOG FILE.*\n)?"
+        r"(?P<failed_test_suite_name>.*) failed!$"
+    )
+
+    for m in re.finditer(regex, tests_out, re.M):
+        # E.g. '2 failed, 128 passed, 2 skipped, 2 warnings'
+        failure_summary = m.group('failure_summary')
+        test_suite = m.group('failed_test_suite_name')
+        failed_suites.append(TestSuiteResult(test_suite, failure_summary))
+        failure_cnt += get_count_for_pattern(r"([0-9]+) failed", failure_summary)
+        error_cnt += get_count_for_pattern(r"([0-9]+) error", failure_summary)
+
+    # Grep for patterns like:
+    # AssertionError: 2 unit test(s) failed:
+    #         DistributedDataParallelTest.test_find_unused_parameters_kwarg_debug_detail
+    #         DistributedDataParallelTest.test_find_unused_parameters_kwarg_grad_is_view_debug_detail
+    #
+    # FINISHED PRINTING LOG FILE of distributed/test_c10d_nccl (<snip>)
+    #
+    # distributed/test_c10d_nccl failed!
+
+    regex = (
+        r"^AssertionError: (?P<failure_summary>[0-9]+ unit test\(s\) failed):\n"
+        r"(\s+.*\n)+"
+        r"(((?!failed!).)*\n){0,5}"
+        r"(?P<failed_test_suite_name>.*) failed!$"
+    )
+
+    for m in re.finditer(regex, tests_out, re.M):
+        # E.g. '2 unit test(s) failed'
+        failure_summary = m.group('failure_summary')
+        test_suite = m.group('failed_test_suite_name')
+        failed_suites.append(TestSuiteResult(test_suite, failure_summary))
+        failure_cnt += get_count_for_pattern(r"([0-9]+) unit test\(s\) failed", failure_summary)
+
+    # Collect total number of tests
+
+    # Pattern for tests ran with unittest like:
+    # Ran 3 tests in 0.387s
+    regex = r"^Ran (?P<test_cnt>[0-9]+) tests in"
+    test_cnt = sum(int(hit) for hit in re.findall(regex, tests_out, re.M))
+    # Pattern for tests ran with pytest like:
+    # ============ 286 passed, 18 skipped, 2 xfailed in 38.71s ============
+    regex = r"=+ (?P<summary>.*) in \d+.* =+\n"
+    count_patterns = [re.compile(r"([0-9]+) " + reason) for reason in [
+        "failed",
+        "passed",
+        "skipped",
+        "deselected",
+        "xfailed",
+        "xpassed",
+    ]]
+    for m in re.finditer(regex, tests_out, re.M):
+        test_cnt += sum(get_count_for_pattern(p, m.group("summary")) for p in count_patterns)
+
+    return TestResult(test_cnt=test_cnt, error_cnt=error_cnt, failure_cnt=failure_cnt, failed_suites=failed_suites)
+
+
 class EB_PyTorch(PythonPackage):
     """Support for building/installing PyTorch."""
 
@@ -283,126 +442,21 @@ def test_step(self):
         tests_out, tests_ec = test_result
 
         # Show failed subtests to aid in debugging failures
-        # I.e. patterns like
-        # === FAIL: test_add_scalar_relu (quantization.core.test_quantized_op.TestQuantizedOps) ===
-        # --- ERROR: test_all_to_all_group_cuda (__main__.TestDistBackendWithSpawn) ---
-        regex = r"^[=-]+\n(FAIL|ERROR): (test_.*?)\s\(.*\n[=-]+\n"
-        failed_test_cases = re.findall(regex, tests_out, re.M)
-        # And patterns like:
-        # FAILED test_ops_gradients.py::TestGradientsCPU::test_fn_grad_linalg_det_singular_cpu_complex128 - [snip]
-        # FAILED [22.8699s] test_sparse_csr.py::TestSparseCompressedCPU::test_invalid_input_csr_large_cpu - [snip]
-        # FAILED [0.0623s] dynamo/test_dynamic_shapes.py::DynamicShapesExportTests::test_predispatch -  [snip]
-        regex = r"^(FAILED) (?:\[.*?\] )?(?:\w|/)+\.py.*::(test_.*?) - "
-        failed_test_cases.extend(re.findall(regex, tests_out, re.M))
-        if failed_test_cases:
-            errored_test_cases = sorted(m[1] for m in failed_test_cases if m[0] == 'ERROR')
-            failed_test_cases = sorted(m[1] for m in failed_test_cases if m[0] != 'ERROR')
+        failed_test_names = find_failed_test_names(tests_out)
+        if failed_test_names.error or failed_test_names.fail:
             msg = []
-            if errored_test_cases:
+            if failed_test_names.error:
                 msg.append("Found %d individual tests that exited with an error: %s"
-                           % (len(errored_test_cases), ', '.join(errored_test_cases)))
-            if failed_test_cases:
+                           % (len(failed_test_names.error), ', '.join(failed_test_names.error)))
+            if failed_test_names.fail:
                 msg.append("Found %d individual tests with failed assertions: %s"
-                           % (len(failed_test_cases), ', '.join(failed_test_cases)))
+                           % (len(failed_test_names.fail), ', '.join(failed_test_names.fail)))
             self.log.warning("\n".join(msg))
 
-        def get_count_for_pattern(regex, text):
-            """Match the regexp containing a single group and return the integer value of the matched group.
-               Return zero if no or more than 1 match was found and warn for the latter case
-            """
-            match = re.findall(regex, text)
-            if len(match) == 1:
-                return int(match[0])
-            elif len(match) > 1:
-                # Shouldn't happen, but means something went wrong with the regular expressions.
-                # Throw warning, as the build might be fine, no need to error on this.
-                warn_msg = "Error in counting the number of test failures in the output of the PyTorch test suite.\n"
-                warn_msg += "Please check the EasyBuild log to verify the number of failures (if any) was acceptable."
-                print_warning(warn_msg)
-            return 0
-
         # Create clear summary report
-        failure_report = []
-        failure_cnt = 0
-        error_cnt = 0
-        failed_test_suites = []
-
-        # Grep for patterns like:
-        # Ran 219 tests in 67.325s
-        #
-        # FAILED (errors=10, skipped=190, expected failures=6)
-        # test_fx failed!
-        regex = (r"^Ran (?P<test_cnt>[0-9]+) tests.*$\n\n"
-                 r"FAILED \((?P<failure_summary>.*)\)$\n"
-                 r"(?:^(?:(?!failed!).)*$\n){0,5}"
-                 r"(?P<failed_test_suite_name>.*) failed!(?: Received signal: \w+)?\s*$")
-
-        for m in re.finditer(regex, tests_out, re.M):
-            # E.g. 'failures=3, errors=10, skipped=190, expected failures=6'
-            failure_summary = m.group('failure_summary')
-            total, test_suite = m.group('test_cnt', 'failed_test_suite_name')
-            failure_report.append("{test_suite} ({total} total tests, {failure_summary})".format(
-                    test_suite=test_suite, total=total, failure_summary=failure_summary
-                ))
-            failure_cnt += get_count_for_pattern(r"(?<!expected )failures=([0-9]+)", failure_summary)
-            error_cnt += get_count_for_pattern(r"errors=([0-9]+)", failure_summary)
-            failed_test_suites.append(test_suite)
-
-        # Grep for patterns like:
-        # ===================== 2 failed, 128 passed, 2 skipped, 2 warnings in 3.43s =====================
-        # test_quantization failed!
-        # OR:
-        # ===================== 2 failed, 128 passed, 2 skipped, 2 warnings in 63.43s (01:03:43) =========
-        # FINISHED PRINTING LOG FILE
-        #
-        # test_quantization failed!
-
-        regex = (
-            r"^=+ (?P<failure_summary>.*) in [0-9]+\.*[0-9]*[a-zA-Z]* (\([0-9]+:[0-9]+:[0-9]+\) )?=+$\n"
-            r"(?:.*FINISHED PRINTING LOG FILE.*\n)?"
-            r"(?:^\s*\n)*"
-            r"(?P<failed_test_suite_name>.*) failed!$"
-        )
-
-        for m in re.finditer(regex, tests_out, re.M):
-            # E.g. '2 failed, 128 passed, 2 skipped, 2 warnings'
-            failure_summary = m.group('failure_summary')
-            test_suite = m.group('failed_test_suite_name')
-            failure_report.append("{test_suite} ({failure_summary})".format(
-                    test_suite=test_suite, failure_summary=failure_summary
-                ))
-            failure_cnt += get_count_for_pattern(r"([0-9]+) failed", failure_summary)
-            error_cnt += get_count_for_pattern(r"([0-9]+) error", failure_summary)
-            failed_test_suites.append(test_suite)
-
-        # Grep for patterns like:
-        # AssertionError: 2 unit test(s) failed:
-        #         DistributedDataParallelTest.test_find_unused_parameters_kwarg_debug_detail
-        #         DistributedDataParallelTest.test_find_unused_parameters_kwarg_grad_is_view_debug_detail
-        #
-        # FINISHED PRINTING LOG FILE of distributed/test_c10d_nccl (<snip>)
-        #
-        # distributed/test_c10d_nccl failed!
-
-        regex = (
-            r"^AssertionError: (?P<failure_summary>[0-9]+ unit test\(s\) failed):\n"
-            r"(\s+.*\n)+"
-            r"(((?!failed!).)*\n){0,5}"
-            r"(?P<failed_test_suite_name>.*) failed!$"
-        )
-
-        for m in re.finditer(regex, tests_out, re.M):
-            # E.g. '2 unit test(s) failed'
-            failure_summary = m.group('failure_summary')
-            test_suite = m.group('failed_test_suite_name')
-            failure_report.append("{test_suite} ({failure_summary})".format(
-                    test_suite=test_suite, failure_summary=failure_summary
-                ))
-            failure_cnt += get_count_for_pattern(r"([0-9]+) unit test\(s\) failed", failure_summary)
-            failed_test_suites.append(test_suite)
-
-        # Make the names unique
-        failed_test_suites = set(failed_test_suites)
+        test_result = parse_test_log(tests_out)
+        failure_report = ['%s (%s)' % (suite.name, suite.summary) for suite in test_result.failed_suites]
+        failed_test_suites = set(suite.name for suite in test_result.failed_suites)
         # Gather all failed tests suites in case we missed any (e.g. when it exited due to syntax errors)
         # Also unique to be able to compare the lists below
         all_failed_test_suites = set(
@@ -419,32 +473,17 @@ def get_count_for_pattern(regex, text):
         failure_report = '\n'.join(failure_report)
 
         # Calculate total number of unsuccesful and total tests
-        failed_test_cnt = failure_cnt + error_cnt
-        # Pattern for tests ran with unittest like:
-        # Ran 3 tests in 0.387s
-        regex = r"^Ran (?P<test_cnt>[0-9]+) tests in"
-        test_cnt = sum(int(hit) for hit in re.findall(regex, tests_out, re.M))
-        # Pattern for tests ran with pytest like:
-        # ============ 286 passed, 18 skipped, 2 xfailed in 38.71s ============
-        regex = r"=+ (?P<summary>.*) in \d+.* =+\n"
-        count_patterns = [re.compile(r"([0-9]+) " + reason) for reason in [
-            "failed",
-            "passed",
-            "skipped",
-            "deselected",
-            "xfailed",
-            "xpassed",
-        ]]
-        for m in re.finditer(regex, tests_out, re.M):
-            test_cnt += sum(get_count_for_pattern(p, m.group("summary")) for p in count_patterns)
+        failed_test_cnt = test_result.failure_cnt + test_result.error_cnt
 
         if failed_test_cnt > 0:
             max_failed_tests = self.cfg['max_failed_tests']
 
-            failure_or_failures = 'failure' if failure_cnt == 1 else 'failures'
-            error_or_errors = 'error' if error_cnt == 1 else 'errors'
+            failure_or_failures = 'failure' if test_result.failure_cnt == 1 else 'failures'
+            error_or_errors = 'error' if test_result.error_cnt == 1 else 'errors'
             msg = "%d test %s, %d test %s (out of %d):\n" % (
-                failure_cnt, failure_or_failures, error_cnt, error_or_errors, test_cnt
+                test_result.failure_cnt, failure_or_failures,
+                test_result.error_cnt, error_or_errors,
+                test_result.test_cnt
             )
             msg += failure_report
 
@@ -470,7 +509,7 @@ def get_count_for_pattern(regex, text):
                     raise EasyBuildError("Too many failed tests (%d), maximum allowed is %d",
                                          failed_test_cnt, max_failed_tests)
         elif failure_report:
-            raise EasyBuildError("Test command had non-zero exit code (%s)!\n%s", tests_ec, failure_report)
+            raise EasyBuildError("Test ended with failures! Exit code: %s\n%s", tests_ec, failure_report)
         elif tests_ec:
             raise EasyBuildError("Test command had non-zero exit code (%s), but no failed tests found?!", tests_ec)
 
@@ -502,3 +541,21 @@ def make_module_req_guess(self):
         # Required to dynamically load libcaffe2_nvrtc.so
         guesses['LD_LIBRARY_PATH'] = [os.path.join(self.pylibdir, 'torch', 'lib')]
         return guesses
+
+
+if __name__ == '__main__':
+    arg = sys.argv[1]
+    if not os.path.isfile(arg):
+        raise RuntimeError('Expected a test result file to parse, got: ' + arg)
+    with open(arg, 'r') as f:
+        content = f.read()
+    m = re.search(r'cmd .*python[^ ]* run_test\.py .* exited with exit code.*output', content)
+    if m:
+        content = content[m.end():]
+        # Heuristic for next possible text added by EasyBuild
+        m = re.search(r'^== \d+-\d+-\d+ .* (pytorch\.py|EasyBuild)', content)
+        if m:
+            content = content[:m.start()]
+
+    print("Failed test names: ", find_failed_test_names(content))
+    print("Test result: ", parse_test_log(content))