Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
329 changes: 193 additions & 136 deletions easybuild/easyblocks/p/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

import os
import re
import sys
import tempfile
import easybuild.tools.environment as env
from easybuild.tools import LooseVersion
Expand All @@ -42,6 +43,164 @@
from easybuild.tools.systemtools import POWER, get_cpu_architecture


if sys.version_info >= (3, 9):
from typing import NamedTuple
FailedTestNames = NamedTuple('FailedTestNames', [('error', list[str]), ('fail', list[str])])
TestSuiteResult = NamedTuple('TestSuiteResult', [('name', str), ('summary', str)])
TestResult = NamedTuple('TestResult', [('test_cnt', int),
('error_cnt', int),
('failure_cnt', int),
('failed_suites', list[TestSuiteResult])])
else:
from collections import namedtuple
FailedTestNames = namedtuple('FailedTestNames', ('error', 'fail'))
TestSuiteResult = namedtuple('TestSuiteResult', ('name', 'summary'))
TestResult = namedtuple('TestResult', ('test_cnt', 'error_cnt', 'failure_cnt', 'failed_suites'))


def find_failed_test_names(tests_out):
"""Find failed names of failed test cases in the output of the test step

Return sorted list of names in FailedTestNames tuple
"""
# patterns like
# === FAIL: test_add_scalar_relu (quantization.core.test_quantized_op.TestQuantizedOps) ===
# --- ERROR: test_all_to_all_group_cuda (__main__.TestDistBackendWithSpawn) ---
regex = r"^[=-]+\n(FAIL|ERROR): (test_.*?)\s\(.*\n[=-]+\n"
failed_test_cases = re.findall(regex, tests_out, re.M)
# And patterns like:
# FAILED test_ops_gradients.py::TestGradientsCPU::test_fn_grad_linalg_det_singular_cpu_complex128 - [snip]
# FAILED [22.8699s] test_sparse_csr.py::TestSparseCompressedCPU::test_invalid_input_csr_large_cpu - [snip]
# FAILED [0.0623s] dynamo/test_dynamic_shapes.py::DynamicShapesExportTests::test_predispatch - [snip]
regex = r"^(FAILED) (?:\[.*?\] )?(?:\w|/)+\.py.*::(test_.*?) - "
failed_test_cases.extend(re.findall(regex, tests_out, re.M))
return FailedTestNames(error=sorted(m[1] for m in failed_test_cases if m[0] == 'ERROR'),
fail=sorted(m[1] for m in failed_test_cases if m[0] != 'ERROR'))


def parse_test_log(tests_out):
"""Parse the test output and return result as TestResult tuple"""

def get_count_for_pattern(regex, text):
"""Match the regexp containing a single group and return the integer value of the matched group.
Return zero if no or more than 1 match was found and warn for the latter case
"""
match = re.findall(regex, text)
if len(match) == 1:
return int(match[0])
elif len(match) > 1:
# Shouldn't happen, but means something went wrong with the regular expressions.
# Throw warning, as the build might be fine, no need to error on this.
warn_msg = "Error in counting the number of test failures in the output of the PyTorch test suite.\n"
warn_msg += "Please check the EasyBuild log to verify the number of failures (if any) was acceptable."
print_warning(warn_msg)
return 0

failure_cnt = 0
error_cnt = 0
failed_suites = []

# Remove empty lines to make RegExs below simpler
tests_out = re.sub(r'^[ \t]*\n', '', tests_out, flags=re.MULTILINE)

# Grep for patterns like:
# Ran 219 tests in 67.325s
#
# FAILED (errors=10, skipped=190, expected failures=6)
# test_fx failed!
regex = (r"^Ran (?P<test_cnt>[0-9]+) tests.*$\n"
r"FAILED \((?P<failure_summary>.*)\)$\n"
r"(?:^(?:(?!failed!).)*$\n){0,5}"
r"(?P<failed_test_suite_name>.*) failed!(?: Received signal: \w+)?\s*$")

for m in re.finditer(regex, tests_out, re.M):
# E.g. 'failures=3, errors=10, skipped=190, expected failures=6'
failure_summary = m.group('failure_summary')
total, test_suite = m.group('test_cnt', 'failed_test_suite_name')
failed_suites.append(
TestSuiteResult(test_suite, "{total} total tests, {failure_summary}".format(
total=total, failure_summary=failure_summary))
)
failure_cnt += get_count_for_pattern(r"(?<!expected )failures=([0-9]+)", failure_summary)
error_cnt += get_count_for_pattern(r"errors=([0-9]+)", failure_summary)

# Grep for patterns like:
# ===================== 2 failed, 128 passed, 2 skipped, 2 warnings in 3.43s =====================
# test_quantization failed!
# OR:
# ===================== 2 failed, 128 passed, 2 skipped, 2 warnings in 63.43s (01:03:43) =========
#
# FINISHED PRINTING LOG FILE
# test_quantization failed!
# OR:
# ===================== 2 failed, 128 passed, 2 skipped, 2 warnings in 63.43s (01:03:43) =========
# If in CI, skip info is located in the xml test reports, please either go to s3 or the hud to download them
#
# FINISHED PRINTING LOG FILE of test_ops_gradients (/tmp/vsc40023/easybuil...)
#
# test_quantization failed!

regex = (
r"^=+ (?P<failure_summary>.*) in [0-9]+\.*[0-9]*[a-zA-Z]* (\([0-9]+:[0-9]+:[0-9]+\) )?=+$\n"
r"(?:.*skip info is located in the xml test reports.*\n)?"
r"(?:.*FINISHED PRINTING LOG FILE.*\n)?"
r"(?P<failed_test_suite_name>.*) failed!$"
)

for m in re.finditer(regex, tests_out, re.M):
# E.g. '2 failed, 128 passed, 2 skipped, 2 warnings'
failure_summary = m.group('failure_summary')
test_suite = m.group('failed_test_suite_name')
failed_suites.append(TestSuiteResult(test_suite, failure_summary))
failure_cnt += get_count_for_pattern(r"([0-9]+) failed", failure_summary)
error_cnt += get_count_for_pattern(r"([0-9]+) error", failure_summary)

# Grep for patterns like:
# AssertionError: 2 unit test(s) failed:
# DistributedDataParallelTest.test_find_unused_parameters_kwarg_debug_detail
# DistributedDataParallelTest.test_find_unused_parameters_kwarg_grad_is_view_debug_detail
#
# FINISHED PRINTING LOG FILE of distributed/test_c10d_nccl (<snip>)
#
# distributed/test_c10d_nccl failed!

regex = (
r"^AssertionError: (?P<failure_summary>[0-9]+ unit test\(s\) failed):\n"
r"(\s+.*\n)+"
r"(((?!failed!).)*\n){0,5}"
r"(?P<failed_test_suite_name>.*) failed!$"
)

for m in re.finditer(regex, tests_out, re.M):
# E.g. '2 unit test(s) failed'
failure_summary = m.group('failure_summary')
test_suite = m.group('failed_test_suite_name')
failed_suites.append(TestSuiteResult(test_suite, failure_summary))
failure_cnt += get_count_for_pattern(r"([0-9]+) unit test\(s\) failed", failure_summary)

# Collect total number of tests

# Pattern for tests ran with unittest like:
# Ran 3 tests in 0.387s
regex = r"^Ran (?P<test_cnt>[0-9]+) tests in"
test_cnt = sum(int(hit) for hit in re.findall(regex, tests_out, re.M))
# Pattern for tests ran with pytest like:
# ============ 286 passed, 18 skipped, 2 xfailed in 38.71s ============
regex = r"=+ (?P<summary>.*) in \d+.* =+\n"
count_patterns = [re.compile(r"([0-9]+) " + reason) for reason in [
"failed",
"passed",
"skipped",
"deselected",
"xfailed",
"xpassed",
]]
for m in re.finditer(regex, tests_out, re.M):
test_cnt += sum(get_count_for_pattern(p, m.group("summary")) for p in count_patterns)

return TestResult(test_cnt=test_cnt, error_cnt=error_cnt, failure_cnt=failure_cnt, failed_suites=failed_suites)


class EB_PyTorch(PythonPackage):
"""Support for building/installing PyTorch."""

Expand Down Expand Up @@ -283,126 +442,21 @@ def test_step(self):
tests_out, tests_ec = test_result

# Show failed subtests to aid in debugging failures
# I.e. patterns like
# === FAIL: test_add_scalar_relu (quantization.core.test_quantized_op.TestQuantizedOps) ===
# --- ERROR: test_all_to_all_group_cuda (__main__.TestDistBackendWithSpawn) ---
regex = r"^[=-]+\n(FAIL|ERROR): (test_.*?)\s\(.*\n[=-]+\n"
failed_test_cases = re.findall(regex, tests_out, re.M)
# And patterns like:
# FAILED test_ops_gradients.py::TestGradientsCPU::test_fn_grad_linalg_det_singular_cpu_complex128 - [snip]
# FAILED [22.8699s] test_sparse_csr.py::TestSparseCompressedCPU::test_invalid_input_csr_large_cpu - [snip]
# FAILED [0.0623s] dynamo/test_dynamic_shapes.py::DynamicShapesExportTests::test_predispatch - [snip]
regex = r"^(FAILED) (?:\[.*?\] )?(?:\w|/)+\.py.*::(test_.*?) - "
failed_test_cases.extend(re.findall(regex, tests_out, re.M))
if failed_test_cases:
errored_test_cases = sorted(m[1] for m in failed_test_cases if m[0] == 'ERROR')
failed_test_cases = sorted(m[1] for m in failed_test_cases if m[0] != 'ERROR')
failed_test_names = find_failed_test_names(tests_out)
if failed_test_names.error or failed_test_names.fail:
msg = []
if errored_test_cases:
if failed_test_names.error:
msg.append("Found %d individual tests that exited with an error: %s"
% (len(errored_test_cases), ', '.join(errored_test_cases)))
if failed_test_cases:
% (len(failed_test_names.error), ', '.join(failed_test_names.error)))
if failed_test_names.fail:
msg.append("Found %d individual tests with failed assertions: %s"
% (len(failed_test_cases), ', '.join(failed_test_cases)))
% (len(failed_test_names.fail), ', '.join(failed_test_names.fail)))
self.log.warning("\n".join(msg))

def get_count_for_pattern(regex, text):
"""Match the regexp containing a single group and return the integer value of the matched group.
Return zero if no or more than 1 match was found and warn for the latter case
"""
match = re.findall(regex, text)
if len(match) == 1:
return int(match[0])
elif len(match) > 1:
# Shouldn't happen, but means something went wrong with the regular expressions.
# Throw warning, as the build might be fine, no need to error on this.
warn_msg = "Error in counting the number of test failures in the output of the PyTorch test suite.\n"
warn_msg += "Please check the EasyBuild log to verify the number of failures (if any) was acceptable."
print_warning(warn_msg)
return 0

# Create clear summary report
failure_report = []
failure_cnt = 0
error_cnt = 0
failed_test_suites = []

# Grep for patterns like:
# Ran 219 tests in 67.325s
#
# FAILED (errors=10, skipped=190, expected failures=6)
# test_fx failed!
regex = (r"^Ran (?P<test_cnt>[0-9]+) tests.*$\n\n"
r"FAILED \((?P<failure_summary>.*)\)$\n"
r"(?:^(?:(?!failed!).)*$\n){0,5}"
r"(?P<failed_test_suite_name>.*) failed!(?: Received signal: \w+)?\s*$")

for m in re.finditer(regex, tests_out, re.M):
# E.g. 'failures=3, errors=10, skipped=190, expected failures=6'
failure_summary = m.group('failure_summary')
total, test_suite = m.group('test_cnt', 'failed_test_suite_name')
failure_report.append("{test_suite} ({total} total tests, {failure_summary})".format(
test_suite=test_suite, total=total, failure_summary=failure_summary
))
failure_cnt += get_count_for_pattern(r"(?<!expected )failures=([0-9]+)", failure_summary)
error_cnt += get_count_for_pattern(r"errors=([0-9]+)", failure_summary)
failed_test_suites.append(test_suite)

# Grep for patterns like:
# ===================== 2 failed, 128 passed, 2 skipped, 2 warnings in 3.43s =====================
# test_quantization failed!
# OR:
# ===================== 2 failed, 128 passed, 2 skipped, 2 warnings in 63.43s (01:03:43) =========
# FINISHED PRINTING LOG FILE
#
# test_quantization failed!

regex = (
r"^=+ (?P<failure_summary>.*) in [0-9]+\.*[0-9]*[a-zA-Z]* (\([0-9]+:[0-9]+:[0-9]+\) )?=+$\n"
r"(?:.*FINISHED PRINTING LOG FILE.*\n)?"
r"(?:^\s*\n)*"
r"(?P<failed_test_suite_name>.*) failed!$"
)

for m in re.finditer(regex, tests_out, re.M):
# E.g. '2 failed, 128 passed, 2 skipped, 2 warnings'
failure_summary = m.group('failure_summary')
test_suite = m.group('failed_test_suite_name')
failure_report.append("{test_suite} ({failure_summary})".format(
test_suite=test_suite, failure_summary=failure_summary
))
failure_cnt += get_count_for_pattern(r"([0-9]+) failed", failure_summary)
error_cnt += get_count_for_pattern(r"([0-9]+) error", failure_summary)
failed_test_suites.append(test_suite)

# Grep for patterns like:
# AssertionError: 2 unit test(s) failed:
# DistributedDataParallelTest.test_find_unused_parameters_kwarg_debug_detail
# DistributedDataParallelTest.test_find_unused_parameters_kwarg_grad_is_view_debug_detail
#
# FINISHED PRINTING LOG FILE of distributed/test_c10d_nccl (<snip>)
#
# distributed/test_c10d_nccl failed!

regex = (
r"^AssertionError: (?P<failure_summary>[0-9]+ unit test\(s\) failed):\n"
r"(\s+.*\n)+"
r"(((?!failed!).)*\n){0,5}"
r"(?P<failed_test_suite_name>.*) failed!$"
)

for m in re.finditer(regex, tests_out, re.M):
# E.g. '2 unit test(s) failed'
failure_summary = m.group('failure_summary')
test_suite = m.group('failed_test_suite_name')
failure_report.append("{test_suite} ({failure_summary})".format(
test_suite=test_suite, failure_summary=failure_summary
))
failure_cnt += get_count_for_pattern(r"([0-9]+) unit test\(s\) failed", failure_summary)
failed_test_suites.append(test_suite)

# Make the names unique
failed_test_suites = set(failed_test_suites)
test_result = parse_test_log(tests_out)
failure_report = ['%s (%s)' % (suite.name, suite.summary) for suite in test_result.failed_suites]
failed_test_suites = set(suite.name for suite in test_result.failed_suites)
# Gather all failed tests suites in case we missed any (e.g. when it exited due to syntax errors)
# Also unique to be able to compare the lists below
all_failed_test_suites = set(
Expand All @@ -419,32 +473,17 @@ def get_count_for_pattern(regex, text):
failure_report = '\n'.join(failure_report)

# Calculate total number of unsuccesful and total tests
failed_test_cnt = failure_cnt + error_cnt
# Pattern for tests ran with unittest like:
# Ran 3 tests in 0.387s
regex = r"^Ran (?P<test_cnt>[0-9]+) tests in"
test_cnt = sum(int(hit) for hit in re.findall(regex, tests_out, re.M))
# Pattern for tests ran with pytest like:
# ============ 286 passed, 18 skipped, 2 xfailed in 38.71s ============
regex = r"=+ (?P<summary>.*) in \d+.* =+\n"
count_patterns = [re.compile(r"([0-9]+) " + reason) for reason in [
"failed",
"passed",
"skipped",
"deselected",
"xfailed",
"xpassed",
]]
for m in re.finditer(regex, tests_out, re.M):
test_cnt += sum(get_count_for_pattern(p, m.group("summary")) for p in count_patterns)
failed_test_cnt = test_result.failure_cnt + test_result.error_cnt

if failed_test_cnt > 0:
max_failed_tests = self.cfg['max_failed_tests']

failure_or_failures = 'failure' if failure_cnt == 1 else 'failures'
error_or_errors = 'error' if error_cnt == 1 else 'errors'
failure_or_failures = 'failure' if test_result.failure_cnt == 1 else 'failures'
error_or_errors = 'error' if test_result.error_cnt == 1 else 'errors'
msg = "%d test %s, %d test %s (out of %d):\n" % (
failure_cnt, failure_or_failures, error_cnt, error_or_errors, test_cnt
test_result.failure_cnt, failure_or_failures,
test_result.error_cnt, error_or_errors,
test_result.test_cnt
)
msg += failure_report

Expand All @@ -470,7 +509,7 @@ def get_count_for_pattern(regex, text):
raise EasyBuildError("Too many failed tests (%d), maximum allowed is %d",
failed_test_cnt, max_failed_tests)
elif failure_report:
raise EasyBuildError("Test command had non-zero exit code (%s)!\n%s", tests_ec, failure_report)
raise EasyBuildError("Test ended with failures! Exit code: %s\n%s", tests_ec, failure_report)
elif tests_ec:
raise EasyBuildError("Test command had non-zero exit code (%s), but no failed tests found?!", tests_ec)

Expand Down Expand Up @@ -502,3 +541,21 @@ def make_module_req_guess(self):
# Required to dynamically load libcaffe2_nvrtc.so
guesses['LD_LIBRARY_PATH'] = [os.path.join(self.pylibdir, 'torch', 'lib')]
return guesses


if __name__ == '__main__':
arg = sys.argv[1]
if not os.path.isfile(arg):
raise RuntimeError('Expected a test result file to parse, got: ' + arg)
with open(arg, 'r') as f:
content = f.read()
m = re.search(r'cmd .*python[^ ]* run_test\.py .* exited with exit code.*output', content)
if m:
content = content[m.end():]
# Heuristic for next possible text added by EasyBuild
m = re.search(r'^== \d+-\d+-\d+ .* (pytorch\.py|EasyBuild)', content)
if m:
content = content[:m.start()]

print("Failed test names: ", find_failed_test_names(content))
print("Test result: ", parse_test_log(content))