From 0b22da9516f40335bed97cee12988eb8354c0e19 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Thu, 10 Aug 2023 09:53:35 +0200 Subject: [PATCH 1/4] Print individual failed PyTorch tests --- easybuild/easyblocks/p/pytorch.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/easybuild/easyblocks/p/pytorch.py b/easybuild/easyblocks/p/pytorch.py index d7a18e59982..5c88c47522f 100644 --- a/easybuild/easyblocks/p/pytorch.py +++ b/easybuild/easyblocks/p/pytorch.py @@ -280,6 +280,24 @@ def test_step(self): tests_out, tests_ec = test_result + # Show failed subtests to aid in debugging failures + # I.e. patterns like + # FAIL: test_add_scalar_relu (quantization.core.test_quantized_op.TestQuantizedOps) + # ERROR: test_all_to_all_group_cuda (__main__.TestDistBackendWithSpawn) + regex = r"^[=-]+\n(FAIL|ERROR): (test_.*?)\s\(.*\n[=-]+\n" + failed_test_cases = re.findall(regex, tests_out, re.M) + if failed_test_cases: + errored_test_cases = sorted(m[1] for m in failed_test_cases if m[0] == 'ERROR') + failed_test_cases = sorted(m[1] for m in failed_test_cases if m[0] == 'FAIL') + msg = [] + if errored_test_cases: + msg.append("Found %d individual tests that exited with an error: %s" + % (len(errored_test_cases), ', '.join(errored_test_cases))) + if failed_test_cases: + msg.append("Found %d individual tests with failed assertions: %s" + % (len(failed_test_cases), ', '.join(failed_test_cases))) + self.log.warning("\n".join(msg)) + def get_count_for_pattern(regex, text): """Match the regexp containing a single group and return the integer value of the matched group. Return zero if no or more than 1 match was found and warn for the latter case From c2bde44a7f2c3f71a2a146c2f8e1b9acca1d68d4 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Tue, 15 Aug 2023 17:54:09 +0200 Subject: [PATCH 2/4] Updated test error RegExs --- easybuild/easyblocks/p/pytorch.py | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/easybuild/easyblocks/p/pytorch.py b/easybuild/easyblocks/p/pytorch.py index 5c88c47522f..b6dcaf31c79 100644 --- a/easybuild/easyblocks/p/pytorch.py +++ b/easybuild/easyblocks/p/pytorch.py @@ -282,13 +282,17 @@ def test_step(self): # Show failed subtests to aid in debugging failures # I.e. patterns like - # FAIL: test_add_scalar_relu (quantization.core.test_quantized_op.TestQuantizedOps) - # ERROR: test_all_to_all_group_cuda (__main__.TestDistBackendWithSpawn) + # === FAIL: test_add_scalar_relu (quantization.core.test_quantized_op.TestQuantizedOps) === + # --- ERROR: test_all_to_all_group_cuda (__main__.TestDistBackendWithSpawn) --- regex = r"^[=-]+\n(FAIL|ERROR): (test_.*?)\s\(.*\n[=-]+\n" failed_test_cases = re.findall(regex, tests_out, re.M) + # And patterns like: + # FAILED test_ops_gradients.py::TestGradientsCPU::test_fn_grad_linalg_det_singular_cpu_complex128 - [snip] + regex = r"^(FAILED): \w+\.py.*::(test_.*?) - " + failed_test_cases.extend(re.findall(regex, tests_out, re.M)) if failed_test_cases: errored_test_cases = sorted(m[1] for m in failed_test_cases if m[0] == 'ERROR') - failed_test_cases = sorted(m[1] for m in failed_test_cases if m[0] == 'FAIL') + failed_test_cases = sorted(m[1] for m in failed_test_cases if m[0] != 'ERROR') msg = [] if errored_test_cases: msg.append("Found %d individual tests that exited with an error: %s" @@ -296,7 +300,7 @@ def test_step(self): if failed_test_cases: msg.append("Found %d individual tests with failed assertions: %s" % (len(failed_test_cases), ', '.join(failed_test_cases))) - self.log.warning("\n".join(msg)) + print("\n".join(msg)) def get_count_for_pattern(regex, text): """Match the regexp containing a single group and return the integer value of the matched group. @@ -326,7 +330,7 @@ def get_count_for_pattern(regex, text): # test_fx failed! regex = (r"^Ran (?P[0-9]+) tests.*$\n\n" r"FAILED \((?P.*)\)$\n" - r"(?:^(?:(?!failed!).)*$\n)*" + r"(?:^(?:(?!failed!).)*$\n){0,5}" r"(?P.*) failed!(?: Received signal: \w+)?\s*$") for m in re.finditer(regex, tests_out, re.M): @@ -342,7 +346,17 @@ def get_count_for_pattern(regex, text): # Grep for patterns like: # ===================== 2 failed, 128 passed, 2 skipped, 2 warnings in 3.43s ===================== - regex = r"^=+ (?P.*) in [0-9]+\.*[0-9]*[a-zA-Z]* =+$\n(?P.*) failed!$" + # test_quantization failed! + # OR: + # ===================== 2 failed, 128 passed, 2 skipped, 2 warnings in 63.43s (01:03:43) ========= + # FINISHED PRINTING LOG FILE + # test_quantization failed! + + regex = ( + r"^=+ (?P.*) in [0-9]+\.*[0-9]*[a-zA-Z]* (\([0-9]+:[0-9]+:[0-9]+\) )?=+$\n" + r"(?:^(?:(?!failed!).)*$\n){0,5}" + r"(?P.*) failed!$" + ) for m in re.finditer(regex, tests_out, re.M): # E.g. '2 failed, 128 passed, 2 skipped, 2 warnings' From 85b94b8f912f93e2b68044483b816dd37921c928 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Wed, 16 Aug 2023 10:14:45 +0200 Subject: [PATCH 3/4] Remove extra colon --- easybuild/easyblocks/p/pytorch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/easybuild/easyblocks/p/pytorch.py b/easybuild/easyblocks/p/pytorch.py index b6dcaf31c79..c817e7068eb 100644 --- a/easybuild/easyblocks/p/pytorch.py +++ b/easybuild/easyblocks/p/pytorch.py @@ -288,7 +288,7 @@ def test_step(self): failed_test_cases = re.findall(regex, tests_out, re.M) # And patterns like: # FAILED test_ops_gradients.py::TestGradientsCPU::test_fn_grad_linalg_det_singular_cpu_complex128 - [snip] - regex = r"^(FAILED): \w+\.py.*::(test_.*?) - " + regex = r"^(FAILED) \w+\.py.*::(test_.*?) - " failed_test_cases.extend(re.findall(regex, tests_out, re.M)) if failed_test_cases: errored_test_cases = sorted(m[1] for m in failed_test_cases if m[0] == 'ERROR') From 82e594002a378bb988396b703257f88579875c28 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Thu, 24 Aug 2023 17:53:46 +0200 Subject: [PATCH 4/4] Change back `print` to `self.log.warning` --- easybuild/easyblocks/p/pytorch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/easybuild/easyblocks/p/pytorch.py b/easybuild/easyblocks/p/pytorch.py index c817e7068eb..86da42a51a4 100644 --- a/easybuild/easyblocks/p/pytorch.py +++ b/easybuild/easyblocks/p/pytorch.py @@ -300,7 +300,7 @@ def test_step(self): if failed_test_cases: msg.append("Found %d individual tests with failed assertions: %s" % (len(failed_test_cases), ', '.join(failed_test_cases))) - print("\n".join(msg)) + self.log.warning("\n".join(msg)) def get_count_for_pattern(regex, text): """Match the regexp containing a single group and return the integer value of the matched group.