From 576edac31fa91a06199d51cc02e167be61c872ad Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Thu, 18 Dec 2025 18:03:41 +0100 Subject: [PATCH 01/30] adding easyconfigs: parameterized-0.9.0-GCCcore-14.3.0.eb, pytest-subtests-0.15.0-GCCcore-14.3.0.eb, PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb, unittest-xml-reporting-3.2.0-GCCcore-14.3.0.eb and patches: PyTorch-1.12.1_add-hypothesis-suppression.patch, PyTorch-1.7.0_disable-dev-shm-test.patch, PyTorch-2.0.1_skip-tests-skipped-in-subprocess.patch, PyTorch-2.1.0_remove-test-requiring-online-access.patch, PyTorch-2.6.0_show-test-duration.patch, PyTorch-2.6.0_skip-test_segfault.patch, PyTorch-2.7.0_avoid_caffe2_test_cpp_jit.patch, PyTorch-2.7.1_avoid-caffe2-sandcastle-test-lib.patch, PyTorch-2.7.1_skip-test_data_parallel_rnn.patch, PyTorch-2.7.1_skip-test_gds_fails_in_ci.patch, PyTorch-2.7.1_skip-test_mixed_mm_exhaustive_dtypes.patch, PyTorch-2.7.1_skip-tests-requiring-SM90.patch, PyTorch-2.7.1_suport-64bit-BARs.patch, PyTorch-2.7.1_tolerance-test_partial_flat_weights.patch, PyTorch-2.9.0_disable-test_nan_assert.patch, PyTorch-2.9.0_enable-symbolizer-in-test_workspace_allocation_error.patch, PyTorch-2.9.0_fix-attention-squeeze.patch, PyTorch-2.9.0_fix-FP16-CPU-tests-in-test_torchinductor_opinfo.patch, PyTorch-2.9.0_fix-nccl-test-env.patch, PyTorch-2.9.0_fix-test_exclude_padding.patch, PyTorch-2.9.0_fix-test_version_error.patch, PyTorch-2.9.0_honor-XDG_CACHE_HOME.patch, PyTorch-2.9.0_increase-tolerance-in-test_transformers.patch, PyTorch-2.9.0_remove-faulty-close.patch, PyTorch-2.9.0_revert-pybind11-3-change.patch, PyTorch-2.9.0_skip-test_benchmark_on_non_zero_device.patch, PyTorch-2.9.0_skip-test_convolution1-on-H100.patch, PyTorch-2.9.0_skip-test_inductor_all_gather_into_tensor_coalesced.patch, PyTorch-2.9.0_skip-test_original_aten_preserved_pad_mm.patch, PyTorch-2.9.0_skip-test_override-without-CUDA.patch, PyTorch-2.9.0_skip-test_unbacked_reduction.patch, PyTorch-2.9.0_skip-tests-requiring-CUDA-12.8.patch, PyTorch-2.9.0_skip-unexpected-success-in-test_fake_export.patch, PyTorch-2.9.1_skip-RingFlexAttentionTest.patch --- ...orch-2.7.0_avoid_caffe2_test_cpp_jit.patch | 14 ++ ...7.1_avoid-caffe2-sandcastle-test-lib.patch | 18 ++ ...ch-2.7.1_skip-test_data_parallel_rnn.patch | 28 +++ ...orch-2.7.1_skip-test_gds_fails_in_ci.patch | 16 ++ ...skip-test_mixed_mm_exhaustive_dtypes.patch | 14 ++ ...orch-2.7.1_skip-tests-requiring-SM90.patch | 34 +++ .../PyTorch-2.7.1_suport-64bit-BARs.patch | 27 +++ ..._tolerance-test_partial_flat_weights.patch | 23 ++ ...yTorch-2.9.0_disable-test_nan_assert.patch | 57 +++++ ...r-in-test_workspace_allocation_error.patch | 28 +++ ...U-tests-in-test_torchinductor_opinfo.patch | 28 +++ .../PyTorch-2.9.0_fix-attention-squeeze.patch | 59 +++++ .../PyTorch-2.9.0_fix-nccl-test-env.patch | 55 +++++ ...Torch-2.9.0_fix-test_exclude_padding.patch | 33 +++ ...PyTorch-2.9.0_fix-test_version_error.patch | 27 +++ .../PyTorch-2.9.0_honor-XDG_CACHE_HOME.patch | 29 +++ ...rease-tolerance-in-test_transformers.patch | 21 ++ .../PyTorch-2.9.0_remove-faulty-close.patch | 48 ++++ ...Torch-2.9.0_revert-pybind11-3-change.patch | 68 ++++++ ...ip-test_benchmark_on_non_zero_device.patch | 23 ++ ...2.9.0_skip-test_convolution1-on-H100.patch | 30 +++ ...tor_all_gather_into_tensor_coalesced.patch | 19 ++ ...-test_original_aten_preserved_pad_mm.patch | 19 ++ ....9.0_skip-test_override-without-CUDA.patch | 35 +++ ...h-2.9.0_skip-test_unbacked_reduction.patch | 18 ++ ...2.9.0_skip-tests-requiring-CUDA-12.8.patch | 122 ++++++++++ ...expected-success-in-test_fake_export.patch | 104 +++++++++ .../PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb | 220 ++++++++++++++++++ ...rch-2.9.1_skip-RingFlexAttentionTest.patch | 23 ++ .../parameterized-0.9.0-GCCcore-14.3.0.eb | 18 ++ .../pytest-subtests-0.15.0-GCCcore-14.3.0.eb | 22 ++ ...test-xml-reporting-3.2.0-GCCcore-14.3.0.eb | 23 ++ 32 files changed, 1303 insertions(+) create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.0_avoid_caffe2_test_cpp_jit.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_avoid-caffe2-sandcastle-test-lib.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_skip-test_data_parallel_rnn.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_skip-test_gds_fails_in_ci.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_skip-test_mixed_mm_exhaustive_dtypes.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_skip-tests-requiring-SM90.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_suport-64bit-BARs.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_tolerance-test_partial_flat_weights.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_disable-test_nan_assert.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_enable-symbolizer-in-test_workspace_allocation_error.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_fix-FP16-CPU-tests-in-test_torchinductor_opinfo.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_fix-attention-squeeze.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_fix-nccl-test-env.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_fix-test_exclude_padding.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_fix-test_version_error.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_honor-XDG_CACHE_HOME.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_increase-tolerance-in-test_transformers.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_remove-faulty-close.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_revert-pybind11-3-change.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_skip-test_benchmark_on_non_zero_device.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_skip-test_convolution1-on-H100.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_skip-test_inductor_all_gather_into_tensor_coalesced.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_skip-test_original_aten_preserved_pad_mm.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_skip-test_override-without-CUDA.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_skip-test_unbacked_reduction.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_skip-tests-requiring-CUDA-12.8.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_skip-unexpected-success-in-test_fake_export.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-RingFlexAttentionTest.patch create mode 100644 easybuild/easyconfigs/p/parameterized/parameterized-0.9.0-GCCcore-14.3.0.eb create mode 100644 easybuild/easyconfigs/p/pytest-subtests/pytest-subtests-0.15.0-GCCcore-14.3.0.eb create mode 100644 easybuild/easyconfigs/u/unittest-xml-reporting/unittest-xml-reporting-3.2.0-GCCcore-14.3.0.eb diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.0_avoid_caffe2_test_cpp_jit.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.0_avoid_caffe2_test_cpp_jit.patch new file mode 100644 index 000000000000..f07706b8d371 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.0_avoid_caffe2_test_cpp_jit.patch @@ -0,0 +1,14 @@ +Avoid tripping on //caffe2/test/cpp/jit:test_custom_class_registrations with IS_SANDCASTLE + +Author: Alexander Grund (TU Dresden) +--- a/torch/testing/_internal/torchbind_impls.py ++++ b/torch/testing/_internal/torchbind_impls.py +@@ -116,8 +116,6 @@ def load_torchbind_test_lib(): + + if IS_MACOS: + raise unittest.SkipTest("non-portable load_library call used in test") +- elif IS_SANDCASTLE or IS_FBCODE: +- lib_file_path = Path("//caffe2/test/cpp/jit:test_custom_class_registrations") + elif IS_WINDOWS: + lib_file_path = find_library_location("torchbind_test.dll") + else: diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_avoid-caffe2-sandcastle-test-lib.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_avoid-caffe2-sandcastle-test-lib.patch new file mode 100644 index 000000000000..bb3103160a73 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_avoid-caffe2-sandcastle-test-lib.patch @@ -0,0 +1,18 @@ +"//caffe2/test/inductor:custom_ops" is a FB-specific "library" which we pull in by setting IS_SANDCASTLE causing +> OSError: /caffe2/test/inductor:custom_ops: cannot open shared object file: No such file or directory +in inductor/test_aot_inductor_custom_ops.py + +Author: Alexander Grund (TU Dresden) +diff --git a/test/inductor/test_aot_inductor_custom_ops.py b/test/inductor/test_aot_inductor_custom_ops.py +index ce2ef3739d3..7b9dc4792fd 100644 +--- a/test/inductor/test_aot_inductor_custom_ops.py ++++ b/test/inductor/test_aot_inductor_custom_ops.py +@@ -380,7 +380,7 @@ common_utils.instantiate_parametrized_tests(AOTInductorTestsTemplate) + + class AOTICustomOpTestCase(TestCase): + def setUp(self): +- if IS_SANDCASTLE or IS_FBCODE: ++ if False: + torch.ops.load_library("//caffe2/test/inductor:custom_ops") + elif IS_MACOS: + raise unittest.SkipTest("non-portable load_library call used in test") diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_skip-test_data_parallel_rnn.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_skip-test_data_parallel_rnn.patch new file mode 100644 index 000000000000..5b81095e9317 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_skip-test_data_parallel_rnn.patch @@ -0,0 +1,28 @@ +Failing upstream too: https://github.com/pytorch/pytorch/issues/162745 +> /PyTorch/2.7.1/foss-2024a-CUDA-12.6.0/pytorch-v2.7.1/test/distributed/test_data_parallel.py", line 99, in test_data_parallel_rnn +> self.assertTrue(p1.allclose(p2)) +> AssertionError: False is not true + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/distributed/test_data_parallel.py b/test/distributed/test_data_parallel.py +index 26f64df90d9..c25cc6673c3 100644 +--- a/test/distributed/test_data_parallel.py ++++ b/test/distributed/test_data_parallel.py +@@ -6,6 +6,7 @@ import io + from collections import OrderedDict + from copy import deepcopy + from itertools import product ++import unittest + + import torch + import torch.nn.functional as F +@@ -63,7 +64,7 @@ class TestDataParallel(TestCase): + + gradcheck(fn, (m.t_rg,)) + +- @skip_but_pass_in_sandcastle_if(not TEST_MULTIGPU, "multi-GPU not supported") ++ @unittest.skip("Fails") + def test_data_parallel_rnn(self): + class TestModule(torch.nn.Module): + def __init__(self) -> None: diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_skip-test_gds_fails_in_ci.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_skip-test_gds_fails_in_ci.patch new file mode 100644 index 000000000000..bb10b1044562 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_skip-test_gds_fails_in_ci.patch @@ -0,0 +1,16 @@ +Skip a test meant for CI only. + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/test_cuda.py b/test/test_cuda.py +index 3726c377970..78b5e8c8af9 100644 +--- a/test/test_cuda.py ++++ b/test/test_cuda.py +@@ -3633,6 +3633,7 @@ print(f"{{r1}}, {{r2}}") + x = torch.cuda.device_count() + self.assertEqual(f"{x}, 1", r) + ++ @unittest.skip("Not applicable") + def test_gds_fails_in_ci(self): + if IS_WINDOWS or TEST_WITH_ROCM: + error_msg = "is not supported on this platform" diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_skip-test_mixed_mm_exhaustive_dtypes.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_skip-test_mixed_mm_exhaustive_dtypes.patch new file mode 100644 index 000000000000..e745a7282085 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_skip-test_mixed_mm_exhaustive_dtypes.patch @@ -0,0 +1,14 @@ +Test fails upstream too, see https://github.com/pytorch/pytorch/issues/147853 +> RuntimeError: Expected to find ".to(" but did not find it + +Author: Alexander Grund (TU Dresden) +--- a/test/inductor/test_pattern_matcher.py ++++ b/test/inductor/test_pattern_matcher.py +@@ -389,6 +389,7 @@ class TestPatternMatcher(TestCase): + } + ) + @unittest.skipIf(not IS_BIG_GPU, "templates require big gpu") ++ @unittest.skip("Fails") + def test_mixed_mm_exhaustive_dtypes(self): + def fn(a, b): + return torch.mm(a, b.to(a.dtype)) diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_skip-tests-requiring-SM90.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_skip-tests-requiring-SM90.patch new file mode 100644 index 000000000000..ee60c76ddbcf --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_skip-tests-requiring-SM90.patch @@ -0,0 +1,34 @@ +Avoid it failing on e.g. A100: + +> [rank1]:E1022 09:55:08.823000 3580472 torch/testing/_internal/common_distributed.py:721] RuntimeError: CUDA error: device-side assert triggered... +> [rank1]:E1022 09:55:08.823000 3580472 torch/testing/_internal/common_distributed.py:721] exiting process 1 with exit code: 10 +> ... +> :318: st_vec: block: [0,0,0], thread: [87,0,0] Assertion `false` failed. +> /pytorch-v2.7.1/torch/csrc/distributed/c10d/CUDASymmetricMemory-inl.h:318: st_vec: block: [0,0,0], thread: [88,0,0] Assertion `false` failed. + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/distributed/test_c10d_nccl.py b/test/distributed/test_c10d_nccl.py +index 7410255d27a..603ea0b375b 100644 +--- a/test/distributed/test_c10d_nccl.py ++++ b/test/distributed/test_c10d_nccl.py +@@ -3367,7 +3367,7 @@ class CommTest(test_c10d_common.AbstractCommTest, MultiProcessTestCase): + @skip_if_rocm_multiprocess + def test_intra_node_comm_all_reduce(self): + from torch._C._distributed_c10d import _get_intra_node_comm_usage_counter +- from torch.testing._internal.common_cuda import SM80OrLater ++ from torch.testing._internal.common_cuda import SM90OrLater + + for peer in range(self.world_size): + if peer == self.rank: +@@ -3375,8 +3375,8 @@ class CommTest(test_c10d_common.AbstractCommTest, MultiProcessTestCase): + if not torch._C._cuda_canDeviceAccessPeer(self.rank, peer): + raise SkipTest("Test requires p2p access") + +- if not SM80OrLater: +- raise SkipTest("Test requires sm>=80") ++ if not SM90OrLater: ++ raise SkipTest("Test requires sm>=90") + + store = c10d.FileStore(self.file_name, self.world_size) + os.environ["ENABLE_INTRA_NODE_COMM"] = "1" diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_suport-64bit-BARs.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_suport-64bit-BARs.patch new file mode 100644 index 000000000000..6e8cdfb2d36a --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_suport-64bit-BARs.patch @@ -0,0 +1,27 @@ +When the GPUs use 64bit BARs the RPC module fails during the initialization with: +> E RuntimeError: In getBar1SizeOfGpu at tensorpipe/channel/cuda_gdr/context_impl.cc:242 "": No such file or directory + +This causes KeyboardInterrupt errors in distributed/rpc/test_share_memory + +See https://github.com/pytorch/pytorch/issues/159354 + +Author: Alexander Grund (TU Dresden) + +diff --git a/third_party/tensorpipe/tensorpipe/channel/cuda_gdr/context_impl.cc b/third_party/tensorpipe/tensorpipe/channel/cuda_gdr/context_impl.cc +index 182a04a..b26751e 100644 +--- a/third_party/tensorpipe/tensorpipe/channel/cuda_gdr/context_impl.cc ++++ b/third_party/tensorpipe/tensorpipe/channel/cuda_gdr/context_impl.cc +@@ -239,6 +239,13 @@ size_t getBar1SizeOfGpu(int gpuIdx) { + + struct stat bar1Stats; + int rv = ::stat(pciPath.c_str(), &bar1Stats); ++ if (rv < 0 && errno == ENOENT) { ++ // Some GPUs use 64 bit BARs using 2 slots each, ++ // so the BAR 0 spans slots 0 & 1 and BAR 1 is at slots 2 & 3 ++ TP_VLOG(5) << "GPU #" << gpuIdx << " might has 64 bit BARs"; ++ pciPath[pciPath.size() - 1] = '2'; ++ rv = ::stat(pciPath.c_str(), &bar1Stats); ++ } + TP_THROW_SYSTEM_IF(rv < 0, errno); + + return bar1Stats.st_size; diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_tolerance-test_partial_flat_weights.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_tolerance-test_partial_flat_weights.patch new file mode 100644 index 000000000000..c58d35aacafd --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_tolerance-test_partial_flat_weights.patch @@ -0,0 +1,23 @@ +Avoid failures in test_nn.py test_partial_flat_weights + +> Mismatched elements: 9 / 36 (25.0%) +> Greatest absolute difference: 3.013014793395996e-05 at index (2, 0, 4) (up to 1e-05 allowed) +> Greatest relative difference: 0.0030790010932832956 at index (2, 0, 4) (up to 1.3e-06 allowed) + +See https://github.com/pytorch/pytorch/issues/163072 + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/test_nn.py b/test/test_nn.py +index 30609247cb1..02a2d3a7f3a 100644 +--- a/test/test_nn.py ++++ b/test/test_nn.py +@@ -4299,7 +4299,7 @@ tensor(..., device='meta', size=(1,), requires_grad=True)""") + inp = inp.cuda() + # otherwise, subsequent warnings will be hidden, and further tests rely on them + warnings.simplefilter("always") +- self.assertEqual(m(inp)[0].cpu(), out_expected[0]) ++ self.assertEqual(m(inp)[0].cpu(), out_expected[0], atol=3.1e-5, rtol=3.1e-3) + + @unittest.skipIf(not TEST_CUDNN, "needs cudnn") + @set_default_dtype(torch.double) diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_disable-test_nan_assert.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_disable-test_nan_assert.patch new file mode 100644 index 000000000000..0f60a483e5aa --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_disable-test_nan_assert.patch @@ -0,0 +1,57 @@ +Disable a test that has incomplete skip condition. +See https://github.com/pytorch/pytorch/pull/167971 + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/distributed/test_c10d_nccl.py b/test/distributed/test_c10d_nccl.py +index 0a0f3ee4ca2..aff8ba0156f 100644 +--- a/test/distributed/test_c10d_nccl.py ++++ b/test/distributed/test_c10d_nccl.py +@@ -11,6 +11,7 @@ import sys + import tempfile + import threading + import time ++import unittest + import warnings + from contextlib import contextmanager + from datetime import datetime, timedelta +@@ -295,12 +296,7 @@ class ProcessGroupNCCLGroupTest(MultiProcessTestCase): + # But if we are in Sandcastle, `skip_but_pass_in_sandcastle` would return 0. + TEST_NAN_ASSERT_RETURN = 0 if IS_SANDCASTLE else signal.SIGABRT + self.special_return_code_checks = { +- self.test_nan_assert_float16.__wrapped__: TEST_NAN_ASSERT_RETURN, +- self.test_nan_assert_float32.__wrapped__: TEST_NAN_ASSERT_RETURN, +- self.test_nan_assert_float64.__wrapped__: TEST_NAN_ASSERT_RETURN, +- self.test_nan_assert_bfloat16.__wrapped__: TEST_NAN_ASSERT_RETURN, +- self.test_nan_assert_float8_e4m3fn.__wrapped__: TEST_NAN_ASSERT_RETURN, +- self.test_nan_assert_float8_e5m2.__wrapped__: TEST_NAN_ASSERT_RETURN, ++ + } + + # TORCH_NCCL_BLOCKING_WAIT overrides TORCH_NCCL_ASYNC_ERROR_HANDLING hence tests +@@ -489,24 +485,7 @@ class ProcessGroupNCCLGroupTest(MultiProcessTestCase): + torch.version.cuda is not None and int(torch.version.cuda.split(".")[0]) >= 12 + ) + +- @requires_nccl() +- @skip_but_pass_in_sandcastle_if( +- # skip for cu126 as well due to https://github.com/pytorch/pytorch/issues/153479 +- not (TEST_MULTIGPU and CUDA_12_AND_ABOVE), +- "NCCL test requires 2+ GPUs and Device side assert could cause unexpected errors in lower versions of CUDA", +- ) +- @parametrize( +- "type", +- [ +- torch.float16, +- torch.float32, +- torch.float64, +- torch.bfloat16, +- torch.float8_e4m3fn, +- torch.float8_e5m2, +- ], +- ) +- @skip_if_rocm_multiprocess ++ @unittest.skip("Wrong conditions") + def test_nan_assert(self, type): + # Expecting a device-side error when NaN is detected + os.environ["TORCH_NCCL_NAN_CHECK"] = "1" diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_enable-symbolizer-in-test_workspace_allocation_error.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_enable-symbolizer-in-test_workspace_allocation_error.patch new file mode 100644 index 000000000000..5c35b586ac8b --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_enable-symbolizer-in-test_workspace_allocation_error.patch @@ -0,0 +1,28 @@ +CudaGraphTreeTests.test_workspace_allocation_error fails if TORCH_DISABLE_ADDR2LINE=1 is set +> File "/pytorch-v2.9.0/test/inductor/test_cudagraph_trees.py", line 1568, in test_workspace_allocation_error +> self.assertTrue( +> AssertionError: False is not true + +See https://github.com/pytorch/pytorch/issues/103369 + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/inductor/test_cudagraph_trees.py b/test/inductor/test_cudagraph_trees.py +--- a/test/inductor/test_cudagraph_trees.py ++++ b/test/inductor/test_cudagraph_trees.py +@@ -5,6 +5,7 @@ import functools + import gc + import importlib + import itertools ++import os + import re + import sys + import unittest +@@ -1543,6 +1544,7 @@ if HAS_CUDA_AND_TRITON: + @skipIfRocm + @unittest.skipUnless(IS_X86 and IS_LINUX, "cpp contexts are linux only") + @torch._inductor.config.patch("triton.cudagraph_trees_history_recording", True) ++ @unittest.mock.patch.dict(os.environ, {"TORCH_DISABLE_ADDR2LINE": "0"}) + def test_workspace_allocation_error(self): + torch._C._cuda_clearCublasWorkspaces() + diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_fix-FP16-CPU-tests-in-test_torchinductor_opinfo.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_fix-FP16-CPU-tests-in-test_torchinductor_opinfo.patch new file mode 100644 index 000000000000..0bf2d29a7459 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_fix-FP16-CPU-tests-in-test_torchinductor_opinfo.patch @@ -0,0 +1,28 @@ +Many tests using Float16 on CPU fail with reference_in_float=False +See https://github.com/pytorch/pytorch/issues/169809 + +E.g.: +> TestInductorOpInfoCPU.test_comprehensive_grid_sampler_2d_cpu_float16 +> [...] +> Mismatched elements: 125 / 780 (16.0%) +> Greatest absolute difference: 0.02001953125 at index (0, 1, 3, 2) (up to 1e-05 allowed) +> Greatest relative difference: 2.34375 at index (1, 1, 2, 4) (up to 0.001 allowed) + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/inductor/test_torchinductor_opinfo.py b/test/inductor/test_torchinductor_opinfo.py +index 807ccb48a79..7e5740e0177 100644 +--- a/test/inductor/test_torchinductor_opinfo.py ++++ b/test/inductor/test_torchinductor_opinfo.py +@@ -1329,8 +1329,10 @@ class TestInductorOpInfo(TestCase): + # Triton + if has_triton(): + adjusted_kwargs.update( +- copy_to_gpu=False, reference_in_float=False ++ copy_to_gpu=False, + ) ++ if device_type == GPU_TYPE: ++ adjusted_kwargs['reference_in_float'] = False + + # skip checking gradient on CPU for now + if device_type == GPU_TYPE: diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_fix-attention-squeeze.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_fix-attention-squeeze.patch new file mode 100644 index 000000000000..851ac1f34bd5 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_fix-attention-squeeze.patch @@ -0,0 +1,59 @@ +From d55c9d52cda889850484968fc55ee73bf40540ec Mon Sep 17 00:00:00 2001 +From: Chien-Chin Huang +Date: Wed, 17 Sep 2025 18:14:51 -0700 +Subject: [PATCH] [CP] Fix cuDNN CP LSE dimension bug (#163231) + +We should only unsqueeze if necessary. + +Fix https://github.com/pytorch/pytorch/issues/162743 + +Pull Request resolved: https://github.com/pytorch/pytorch/pull/163231 +Approved by: https://github.com/eqy +ghstack dependencies: #162539, #162540, #162541, #163115, #163131 +--- + .../tensor/experimental/_attention.py | 18 +++++++++++++++--- + 1 file changed, 15 insertions(+), 3 deletions(-) + +diff --git a/torch/distributed/tensor/experimental/_attention.py b/torch/distributed/tensor/experimental/_attention.py +index 6336967582429..a3345f37a170d 100644 +--- a/torch/distributed/tensor/experimental/_attention.py ++++ b/torch/distributed/tensor/experimental/_attention.py +@@ -134,6 +134,7 @@ def __init__(self, convert_to_f32: bool, seq_dim: int): + self._seq_dim = seq_dim + self._out: Optional[torch.Tensor] = None + self._lse: Optional[torch.Tensor] = None ++ self._should_lse_squeeze = False + self._convert_to_f32 = convert_to_f32 + self._out_dtype = torch.float32 + self._lse_dtype = torch.float32 +@@ -141,7 +142,14 @@ def __init__(self, convert_to_f32: bool, seq_dim: int): + def _merge_one( + self, block_out: torch.Tensor, block_lse: torch.Tensor, partial: bool + ) -> None: +- block_lse = block_lse.unsqueeze(dim=-1) ++ # The cuDNN backend preserves the last dimension for LSE. ++ # Apply unsqueeze only if the input does not already have ++ # the required dimensionality. ++ if len(block_lse.shape) < len(block_out.shape): ++ block_lse = block_lse.unsqueeze(dim=-1) ++ self._should_lse_squeeze = True ++ assert len(block_lse.shape) == len(block_out.shape) ++ + if self._lse is None: + self._lse = block_lse + self._out = block_out +@@ -199,8 +207,12 @@ def step(self, out: torch.Tensor, lse: torch.Tensor, partial: bool) -> None: + def results(self) -> tuple[torch.Tensor, torch.Tensor]: + assert self._out is not None + assert self._lse is not None +- out, lse = self._out, self._lse.squeeze(-1) +- return out.to(self._out_dtype), lse.to(self._lse_dtype) ++ out = self._out.to(self._out_dtype) ++ if self._should_lse_squeeze: ++ lse = self._lse.squeeze(-1).to(self._lse_dtype) ++ else: ++ lse = self._lse.to(self._lse_dtype) ++ return out, lse + + + class _AttentionOp(Protocol): diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_fix-nccl-test-env.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_fix-nccl-test-env.patch new file mode 100644 index 000000000000..248d6d934b7b --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_fix-nccl-test-env.patch @@ -0,0 +1,55 @@ +From 6702f545d880fd82700811e4a3508cdd76da9a69 Mon Sep 17 00:00:00 2001 +From: Alexander Grund +Date: Tue, 16 Sep 2025 17:37:06 +0000 +Subject: [PATCH] Restore environment after NcclUserBufferRegistrationTest + (#163063) + +This test sets "NCCL_ALGO=NVLS" in NcclUserBufferRegistrationTest which affects tests run in the same process such as `test_on_completion_hook_*` that fail with +> invalid usage (run with NCCL_DEBUG=WARN for details), NCCL version 2.26.2 +> ncclInvalidUsage: This usually reflects invalid usage of NCCL library. +> Last error: +> Error : no algorithm/protocol available for function Broadcast with datatype ncclInt8. NCCL_ALGO was set to NVLS. + +Pull Request resolved: https://github.com/pytorch/pytorch/pull/163063 +Approved by: https://github.com/ezyang +--- + test/distributed/test_c10d_nccl.py | 21 +++++++++++++-------- + 1 file changed, 13 insertions(+), 8 deletions(-) + +diff --git a/test/distributed/test_c10d_nccl.py b/test/distributed/test_c10d_nccl.py +index 0d55845228da..f44394e3148c 100644 +--- a/test/distributed/test_c10d_nccl.py ++++ b/test/distributed/test_c10d_nccl.py +@@ -3145,19 +3145,24 @@ def test_invalid_nccl_blocking_wait_env(self): + class NcclUserBufferRegistrationTest(MultiProcessTestCase): + def setUp(self): + super().setUp() +- # TORCH_NCCL_BLOCKING_WAIT overrides TORCH_NCCL_ASYNC_ERROR_HANDLING hence tests +- # that use TORCH_NCCL_BLOCKING_WAIT will test it as expected. +- os.environ["TORCH_NCCL_ASYNC_ERROR_HANDLING"] = "1" + nccl_debug_file = tempfile.NamedTemporaryFile() +- os.environ["NCCL_ALGO"] = "NVLS" +- os.environ["NCCL_DEBUG"] = "INFO" +- os.environ["NCCL_DEBUG_SUBSYS"] = "NVLS" ++ nccl_env = { ++ # TORCH_NCCL_BLOCKING_WAIT overrides TORCH_NCCL_ASYNC_ERROR_HANDLING hence tests ++ # that use TORCH_NCCL_BLOCKING_WAIT will test it as expected. ++ "TORCH_NCCL_ASYNC_ERROR_HANDLING": "1", ++ "NCCL_ALGO": "NVLS", ++ "NCCL_DEBUG": "INFO", ++ "NCCL_DEBUG_SUBSYS": "NVLS", ++ "NCCL_DEBUG_FILE": nccl_debug_file.name, ++ } + if torch.cuda.nccl.version() >= (2, 24, 3): +- os.environ["NCCL_DEBUG_SUBSYS"] = "REG,TUNING" +- os.environ["NCCL_DEBUG_FILE"] = nccl_debug_file.name ++ nccl_env["NCCL_DEBUG_SUBSYS"] = "REG,TUNING" ++ self.env_patcher = mock.patch.dict(os.environ, nccl_env) ++ self.env_patcher.start() + self._spawn_processes() + + def tearDown(self): ++ self.env_patcher.stop() + super().tearDown() + try: + os.remove(self.file_name) diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_fix-test_exclude_padding.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_fix-test_exclude_padding.patch new file mode 100644 index 000000000000..b74d565bc51f --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_fix-test_exclude_padding.patch @@ -0,0 +1,33 @@ +PadMMTest.test_exclude_padding fails on H100 with +> self.assertTrue(len(local_cache) == 2) +> AssertionError: False is not true + +Increasing the size triggers the intended code. +See https://github.com/pytorch/pytorch/pull/169177 + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/inductor/test_pad_mm.py b/test/inductor/test_pad_mm.py +--- a/test/inductor/test_pad_mm.py ++++ b/test/inductor/test_pad_mm.py +@@ -425,7 +426,10 @@ class PadMMTest(TestCase): + def mm(a, b): + return a @ b + +- mm(torch.rand([25, 25], device="cuda"), torch.rand([25, 25], device="cuda")) ++ # Size must be big enough such that `is_mm_compute_bound` returns True and we need padding to 4 elements ++ # machine balance is ~8.3 (A100), 14.1 (H100), size must be 3x that, see arithmetic_intensity for M=N=K ++ size = [59, 59] ++ mm(torch.rand(size, device="cuda"), torch.rand(size, device="cuda")) + local_cache = get_pad_cache().get_local_cache() + self.assertTrue(len(local_cache) == 2) + FileCheck().check_count("exclude_pad:False", 2, exactly=True).run( +@@ -436,7 +440,7 @@ class PadMMTest(TestCase): + def mm(a, b): + return (a + 1) @ b + +- mm(torch.rand([25, 25], device="cuda"), torch.rand([25, 25], device="cuda")) ++ mm(torch.rand(size, device="cuda"), torch.rand(size, device="cuda")) + local_cache = get_pad_cache().get_local_cache() + # reuse original base timing + self.assertTrue(len(local_cache) == 3) diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_fix-test_version_error.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_fix-test_version_error.patch new file mode 100644 index 000000000000..819b85773563 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_fix-test_version_error.patch @@ -0,0 +1,27 @@ +TestSaveLoad.test_version_error causes a failure due to TEMPDIR being set by EasyBuild: + +> Ran into the following error when deserializing: [enforce fail at inline_container.cc:332] . file in archive is not in a subdirectory tmpi40i4vmn/: easybuild-tmp/archive_version + +Fix the code to handle that, see https://github.com/pytorch/pytorch/pull/169936 + +diff --git a/test/export/test_serialize.py b/test/export/test_serialize.py +index faef9b455a0..e3a463014fb 100644 +--- a/test/export/test_serialize.py ++++ b/test/export/test_serialize.py +@@ -7,6 +7,7 @@ with test_sym_bool) + import copy + import io + import math ++import os + import tempfile + import unittest + import zipfile +@@ -1915,7 +1916,7 @@ class TestSaveLoad(TestCase): + with tempfile.NamedTemporaryFile(suffix=".pt2") as f: + save(ep, f.name) + f.seek(0) +- file_prefix = f.name.split("/")[2].split(".")[0] ++ file_prefix = os.path.splitext(os.path.basename(f.name))[0] + + # Create a new file and copy things over, but modify the + # archive version diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_honor-XDG_CACHE_HOME.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_honor-XDG_CACHE_HOME.patch new file mode 100644 index 000000000000..e2a096dd8b94 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_honor-XDG_CACHE_HOME.patch @@ -0,0 +1,29 @@ +Avoid PyTorch trying to use $HOME if XDG_CACHE_HOME is set. +See https://github.com/pytorch/pytorch/pull/168232 + +Author: Alexander Grund (TU Dresden) + +diff --git a/torch/csrc/distributed/c10d/FlightRecorder.cpp b/torch/csrc/distributed/c10d/FlightRecorder.cpp +--- a/torch/csrc/distributed/c10d/FlightRecorder.cpp ++++ b/torch/csrc/distributed/c10d/FlightRecorder.cpp +@@ -36,8 +36,18 @@ DebugInfoWriter& DebugInfoWriter::getWriter(int rank) { + if (writer_ == nullptr) { + // Attempt to write to running user's HOME directory cache folder - if it + // exists. +- auto homeDir = getCvarString({"HOME"}, "/tmp"); +- auto cacheDirPath = std::filesystem::path(homeDir + "/.cache/torch"); ++ #ifdef _WIN32 ++ const char* cacheHome = nullptr; ++ #else ++ // Uses XDG_CACHE_HOME if it's set ++ const char* cacheHome = std::getenv("XDG_CACHE_HOME"); ++ #endif ++ std::string cacheRoot; ++ if (cacheHome) ++ cacheRoot = cacheHome; ++ else ++ cacheRoot = getCvarString({"HOME"}, "/tmp") + "/.cache"; ++ auto cacheDirPath = std::filesystem::path(cacheRoot + "/torch"); + // Create the .cache directory if it doesn't exist + std::filesystem::create_directories(cacheDirPath); + auto defaultLocation = cacheDirPath / "comm_lib_trace_rank_"; diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_increase-tolerance-in-test_transformers.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_increase-tolerance-in-test_transformers.patch new file mode 100644 index 000000000000..76180cb44818 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_increase-tolerance-in-test_transformers.patch @@ -0,0 +1,21 @@ +When not using Intel MKL this shows a tolerance error in +TestSDPACpuOnlyCPU.test_scaled_dot_product_fused_attention_mask_vs_math_cpu_fused_kernel0_float32_batch_size_12_q_seq_len_1030_kv_seq_len_17_n_head_1_head_dim_8_mask_dim_2_bool_mask_True_train_True_casual_False_set_attn_mask_True_cpu_float32 + +> self.assertEqual(grad_k_actual, grad_k_ref, atol=tol_grad.atol, rtol=tol_grad.rtol) +> Mismatched elements: 1 / 1632 (0.1%) +> Greatest absolute difference: 1.245737075805664e-05 at index (9, 0, 15, 1) (up to 1e-05 allowed) +> Greatest relative difference: 5.157565828994848e-05 at index (9, 0, 15, 1) (up to 5e-06 allowed) + +diff --git a/test/test_transformers.py b/test/test_transformers.py +index 5b240e1f046..2e1b4091d35 100644 +--- a/test/test_transformers.py ++++ b/test/test_transformers.py +@@ -2153,6 +2153,8 @@ class TestSDPACpuOnly(NNTestCase): + tol_grad = Tolerances(5e-2, 5e-2) + if dtype is torch.float16: + tol_grad = Tolerances(1e-1, 1e-1) ++ if dtype is torch.float32: ++ tol_grad = Tolerances(1.3e-5, 5.2e-5) + for mask_shape in itertools.product( + [q_seq_len, 1], [kv_seq_len, 1] + ) if mask_dim == 2 else itertools.product( diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_remove-faulty-close.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_remove-faulty-close.patch new file mode 100644 index 000000000000..0eeea901157c --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_remove-faulty-close.patch @@ -0,0 +1,48 @@ +commit d3d62ad44284abff4fcd0c70e245739c976bf5e1 +Author: Alexander Grund +Date: Tue Nov 25 13:54:26 2025 +0100 + + Avoid closing random file handles in Inductor + + `CppCodeCache.load` returns a `ctypes.CDLL`. + That does not have a (Python class) `close` function so calling + `self.DLL.close()` calls whatever C function with name `close` happens + to exist. This is usually the glibc `close` that closes (file) handles. + As the argument is missing it closes whatever happens to be in the + register at that point. + + In some tests this seems to close "fd=1", i.e. stdout. Sebsequent + writes/print then fails with + > OSError: [Errno 9] Bad file descriptor + + Simply remove the `close` call for now. + +diff --git a/torch/_inductor/autotune_process.py b/torch/_inductor/autotune_process.py +index 1d1687141fb..66b741fafe2 100644 +--- a/torch/_inductor/autotune_process.py ++++ b/torch/_inductor/autotune_process.py +@@ -882,14 +882,6 @@ class CppBenchmarkRequest(CPUDeviceBenchmarkMixin, BenchmarkRequest): + *self.extra_args, + ) + +- def cleanup_run_fn(self) -> None: +- if self.DLL is not None: +- """ +- Check close attr due to it crash on Windows. +- """ +- if hasattr(self.DLL, "close"): +- self.DLL.close() +- + def __str__(self) -> str: + return f"{self.kernel_name=}" + +@@ -939,9 +931,6 @@ class CuteDSLBenchmarkRequest(GPUDeviceBenchmarkMixin, BenchmarkRequest): + + return run_kernel + +- def cleanup_run_fn(self) -> None: +- """Clean up any resources used by the kernel.""" +- + + @functools.cache + def get_tuning_process_pool() -> TuningProcessPool: diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_revert-pybind11-3-change.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_revert-pybind11-3-change.patch new file mode 100644 index 000000000000..1b831f45fa58 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_revert-pybind11-3-change.patch @@ -0,0 +1,68 @@ +Revert https://github.com/pytorch/pytorch/pull/161063 + +The PR introduced changes required for the pybind11 3.x API which makes it incompatible with pybind11 2.x + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/inductor/test_cpu_cpp_wrapper.py b/test/inductor/test_cpu_cpp_wrapper.py +index 47a8f3aa063..4b4daaef5c4 100644 +--- a/test/inductor/test_cpu_cpp_wrapper.py ++++ b/test/inductor/test_cpu_cpp_wrapper.py +@@ -268,7 +268,7 @@ if RUN_CPU: + "test_multi_threading", + condition=not IS_WINDOWS, + # Two threads compile, so we expect the output code to be printed twice. +- code_string_count={"py::gil_scoped_release_simple release;": 2}, ++ code_string_count={"py::gil_scoped_release release;": 2}, + ), + BaseTest("test_profiler_mark_wrapper_call"), + BaseTest( +diff --git a/torch/_inductor/codegen/cpp_wrapper_cpu.py b/torch/_inductor/codegen/cpp_wrapper_cpu.py +index 83d1d061467..77f9c368ed3 100644 +--- a/torch/_inductor/codegen/cpp_wrapper_cpu.py ++++ b/torch/_inductor/codegen/cpp_wrapper_cpu.py +@@ -585,7 +585,7 @@ class CppWrapperCpu(PythonWrapperCodegen): + # Weights are promoted in the JIT mode + num_args = len(V.graph.graph_inputs) + len(V.graph.constants) + # release GIL to support multiple instances inference (in different threads of the same process) +- self.prefix.splice("py::gil_scoped_release_simple release;") ++ self.prefix.splice("py::gil_scoped_release release;") + + self.prefix.splice( + f""" +@@ -2310,7 +2310,7 @@ class CppWrapperCpu(PythonWrapperCodegen): + + scoped_lines.writeline("{") + with scoped_lines.indent(): +- scoped_lines.writeline("py::gil_scoped_acquire_simple acquire;") ++ scoped_lines.writeline("py::gil_scoped_acquire acquire;") + scoped_lines.writelines(lines_in_scope.split("\n")) + scoped_lines.writelines("}") + return scoped_lines._lines +diff --git a/torch/_inductor/codegen/cpp_wrapper_cpu_array_ref.py b/torch/_inductor/codegen/cpp_wrapper_cpu_array_ref.py +index 63c5bc2debe..fd145ece606 100644 +--- a/torch/_inductor/codegen/cpp_wrapper_cpu_array_ref.py ++++ b/torch/_inductor/codegen/cpp_wrapper_cpu_array_ref.py +@@ -297,7 +297,7 @@ class CppWrapperCpuArrayRef(CppWrapperCpu): + # Weights are promoted in the JIT mode + num_args = len(V.graph.graph_inputs) + len(V.graph.constants) + # release GIL to support multiple instances inference (in different threads of the same process) +- self.prefix.splice("py::gil_scoped_release_simple release;") ++ self.prefix.splice("py::gil_scoped_release release;") + + self.prefix.splice( + f""" +diff --git a/torch/csrc/inductor/cpp_wrapper/common.h b/torch/csrc/inductor/cpp_wrapper/common.h +index a2eebfcc860..9d9ae16462c 100644 +--- a/torch/csrc/inductor/cpp_wrapper/common.h ++++ b/torch/csrc/inductor/cpp_wrapper/common.h +@@ -6,7 +6,8 @@ + #include + + #include +-#include ++#define PYBIND11_SIMPLE_GIL_MANAGEMENT ++#include + + // Include some often-used cpp_wrapper headers, for precompiling. + #include diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_skip-test_benchmark_on_non_zero_device.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_skip-test_benchmark_on_non_zero_device.patch new file mode 100644 index 000000000000..b0a55ad49125 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_skip-test_benchmark_on_non_zero_device.patch @@ -0,0 +1,23 @@ +inductor/test_benchmark_fusion.py BenchmarkingTest.test_benchmark_on_non_zero_device fails with +> self.assertTrue(hit_count > 0) +> AssertionError: False is not true + +Related: https://github.com/pytorch/pytorch/issues/160514 + +Author: Alexander Grund (TU Dresden) + +--- a/test/inductor/test_benchmark_fusion.py ++++ b/test/inductor/test_benchmark_fusion.py +@@ -206,10 +206,7 @@ if HAS_CUDA_AND_TRITON: + copy_tests(BenchmarkFusionTestTemplate, BenchmarkFusionCudaTest, "cuda") + + class BenchmarkingTest(TestCase): +- @unittest.skipIf( +- torch.cuda.device_count() < 2, "The test need at least 2 devices" +- ) +- @skip_if_cpp_wrapper("This tests triton scheduling directly") ++ @unittest.skip("Mocking fails") + def test_benchmark_on_non_zero_device(self): + hit_count = 0 + with torch.cuda.device("cuda:0"): + diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_skip-test_convolution1-on-H100.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_skip-test_convolution1-on-H100.patch new file mode 100644 index 000000000000..e0c0a45b3415 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_skip-test_convolution1-on-H100.patch @@ -0,0 +1,30 @@ +test_select_algorithm.py TestSelectAlgorithm.test_convolution1 fails on H100 with: + +> Mismatched elements: 19584 / 23120 (84.7%) +> Greatest absolute difference: 132.32015991210938 at index (0, 22, 4, 13) (up to 0.0001 allowed) +> Greatest relative difference: inf at index (0, 0, 1, 0) (up to 0.0001 allowed) + +See https://github.com/pytorch/pytorch/issues/143412 + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/inductor/test_select_algorithm.py b/test/inductor/test_select_algorithm.py +index b30cdc2d946..25d3c068133 100644 +--- a/test/inductor/test_select_algorithm.py ++++ b/test/inductor/test_select_algorithm.py +@@ -27,6 +27,7 @@ from torch.testing._internal.common_utils import IS_LINUX, skipIfRocm, skipIfXpu + from torch.testing._internal.inductor_utils import ( + GPU_TYPE, + HAS_GPU, ++ IS_H100, + requires_gpu, + requires_triton, + ) +@@ -295,6 +296,7 @@ class TestSelectAlgorithm(TestCase): + foo(torch.randn(64, 64, device=GPU_TYPE)) + self.assertEqual(counters["inductor"]["select_algorithm_autotune"], 1) + ++ @unittest.skipIf(IS_H100, "Fails on H100, see #143412") + @expectedFailureDynamicWrapper + @patches + def test_convolution1(self): diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_skip-test_inductor_all_gather_into_tensor_coalesced.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_skip-test_inductor_all_gather_into_tensor_coalesced.patch new file mode 100644 index 000000000000..fe992ece4f59 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_skip-test_inductor_all_gather_into_tensor_coalesced.patch @@ -0,0 +1,19 @@ +The test fails with +> RuntimeError: Expected to find "buf0 = torch.ops._c10d_functional.all_gather_into_tensor_coalesced.default([arg3_1, arg2_1, arg1_1, arg0_1]" but did not find it + +Also upstream: https://github.com/pytorch/pytorch/issues/146806 + +Author: Alexander Grund (TU Dresden) +diff --git a/test/distributed/test_c10d_functional_native.py b/test/distributed/test_c10d_functional_native.py +index bafc781b591..60fc47f63e4 100644 +--- a/test/distributed/test_c10d_functional_native.py ++++ b/test/distributed/test_c10d_functional_native.py +@@ -997,7 +997,7 @@ class CompileTest(TestCase): + AOTIRunnerUtil.run(func, (arg,)) + torch.cuda.synchronize() + +- @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") ++ @unittest.skip("Fails") + @fresh_cache() + def test_inductor_all_gather_into_tensor_coalesced(self): + def func(args: list[torch.Tensor]) -> torch.Tensor: diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_skip-test_original_aten_preserved_pad_mm.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_skip-test_original_aten_preserved_pad_mm.patch new file mode 100644 index 000000000000..88d176f6051c --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_skip-test_original_aten_preserved_pad_mm.patch @@ -0,0 +1,19 @@ +Skip test_pad_mm.py PadMMTest.test_original_aten_preserved_pad_mm failing on: +> File "/dev/shm/pytorch-v2.9.1/test/inductor/test_pad_mm.py", line 538, in test_original_aten_preserved_pad_mm +> self.assertEqual(counters["inductor"]["pattern_matcher_count"], 1) + +See https://github.com/pytorch/pytorch/issues/170562 + +Author: Alexander Grund (TU Dresden) +diff --git a/test/inductor/test_pad_mm.py b/test/inductor/test_pad_mm.py +index 781f4588e14..b6f0fcebb3c 100644 +--- a/test/inductor/test_pad_mm.py ++++ b/test/inductor/test_pad_mm.py +@@ -508,6 +508,7 @@ class PadMMTest(TestCase): + + assert torch.allclose(res2, mm_expected_result), "MM results are not identical" + ++ @unittest.skip("Fails") + @fresh_cache() + @inductor_config.patch( + { diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_skip-test_override-without-CUDA.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_skip-test_override-without-CUDA.patch new file mode 100644 index 000000000000..bc2b927e0a0d --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_skip-test_override-without-CUDA.patch @@ -0,0 +1,35 @@ +This test fails during creation of the tests at startup: +> File "/var/lib/jenkins/workspace/test/test_overrides.py", line 683, in _simple_type_parser +> return torch.Stream() +> RuntimeError: CUDA error: CUDA driver version is insufficient for CUDA runtime version + +See https://github.com/pytorch/pytorch/pull/166625 + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/test_overrides.py b/test/test_overrides.py +index 8454677856d..8df233e279f 100644 +--- a/test/test_overrides.py ++++ b/test/test_overrides.py +@@ -9,9 +9,9 @@ import pprint + import pickle + import collections + import unittest +-import os ++import contextlib + +-from torch.testing._internal.common_utils import TestCase, run_tests, TEST_WITH_CROSSREF ++from torch.testing._internal.common_utils import TestCase, run_tests, TEST_CUDA, TEST_WITH_CROSSREF + from torch.overrides import ( + handle_torch_function, + has_torch_function, +@@ -30,8 +30,7 @@ from torch.utils._pytree import tree_map + + Tensor = torch.Tensor + +-if os.getenv("ATEN_CPU_CAPABILITY") in ("default", "avx2"): +- # This test is not supported on ARM ++if not TEST_CUDA: + print( + "Skipping due to failing when cuda build runs on non cuda machine, " + + "see https://github.com/pytorch/pytorch/pull/150059 for example" diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_skip-test_unbacked_reduction.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_skip-test_unbacked_reduction.patch new file mode 100644 index 000000000000..bfb54615bf5e --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_skip-test_unbacked_reduction.patch @@ -0,0 +1,18 @@ +TestInductorDynamicCPU.test_unbacked_reduction_cpu doesn't only fail on ROCM with: +> AssertionError: expected to fail, but actually passed + + +See https://github.com/pytorch/pytorch/issues/154217 + +Author: Alexander Grund (TU Dresden) + +--- a/test/inductor/test_torchinductor_dynamic_shapes.py ++++ b/test/inductor/test_torchinductor_dynamic_shapes.py +@@ -513,6 +513,7 @@ class TestInductorDynamic(TestCase): + ).sum().backward() + self.assertEqual(t.grad, expect) + ++ @unittest.skip("Fails on CPU") + @torch._dynamo.config.patch(capture_scalar_outputs=True) + def test_unbacked_reduction(self, device): + expect_fail = ( diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_skip-tests-requiring-CUDA-12.8.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_skip-tests-requiring-CUDA-12.8.patch new file mode 100644 index 000000000000..a4aadc780df0 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_skip-tests-requiring-CUDA-12.8.patch @@ -0,0 +1,122 @@ +These tests use Triton to generate PTX code and then compile that with NVCC. + +As Triton 3.5 uses PTXAS from CUDA 12.8 it cannot be compiled with NVCC from CUDA 12.6. + +Failures look like: +> ptxas /tmp/torchinductor_s3248973/bvqcnu2o7/2mwinejhnbvqcnu2o73mk3zrx6.ptx, line 5; fatal : Unsupported .version 8.7; current version is '8.5' + +in following tests: +- test_simple_multi_arch +- test_compile_after_package_multi_arch +- test_compile_after_package_static +- test_compile_standalone_cos +- test_compile_with_exporter +- test_compile_with_exporter_weights + +See https://github.com/pytorch/pytorch/issues/168353 + +Author: Alexander Grund (TU Dresden) + +--- a/test/inductor/test_aot_inductor.py ++++ b/test/inductor/test_aot_inductor.py +@@ -39,7 +39,7 @@ from torch.export.pt2_archive._package import load_pt2 + from torch.testing import FileCheck + from torch.testing._internal import common_utils + from torch.testing._internal.common_cuda import ( +- _get_torch_cuda_version, ++ requires_triton_ptxas_compat, + PLATFORM_SUPPORTS_FLASH_ATTENTION, + PLATFORM_SUPPORTS_FP8, + PLATFORM_SUPPORTS_MEM_EFF_ATTENTION, +@@ -239,9 +239,7 @@ class AOTInductorTestsTemplate: + # Skip embed_kernel_binary == True for now as it shows random + # failure on CI + @common_utils.parametrize("embed_kernel_binary", [False]) +- @unittest.skipIf( +- _get_torch_cuda_version() < (12, 6), "Test is only supported on CUDA 12.6+" +- ) ++ @requires_triton_ptxas_compat + def test_simple_multi_arch(self, embed_kernel_binary): + if self.device != GPU_TYPE: + raise unittest.SkipTest("requires GPU_TYPE") +diff --git a/test/inductor/test_aot_inductor_package.py b/test/inductor/test_aot_inductor_package.py +index 0eb1057c802..843f63ff17d 100644 +--- a/test/inductor/test_aot_inductor_package.py ++++ b/test/inductor/test_aot_inductor_package.py +@@ -27,7 +27,7 @@ from torch.export.pt2_archive._package import ( + load_pt2, + load_weights_to_pt2_contents, + ) +-from torch.testing._internal.common_cuda import _get_torch_cuda_version ++from torch.testing._internal.common_cuda import _get_torch_cuda_version, requires_triton_ptxas_compat + from torch.testing._internal.common_utils import ( + IS_FBCODE, + skipIfRocm, +@@ -319,9 +319,7 @@ class TestAOTInductorPackage(TestCase): + actual = optimized(*example_inputs) + self.assertTrue(torch.allclose(actual, expected)) + +- @unittest.skipIf( +- _get_torch_cuda_version() < (12, 6), "Test is only supported on CUDA 12.6+" +- ) ++ @requires_triton_ptxas_compat + @unittest.skipIf(IS_FBCODE, "cmake won't work in fbcode") + @skipIfRocm # doesn't support multi-arch binary + @skipIfXpu # doesn't support multi-arch binary +@@ -366,9 +364,7 @@ class TestAOTInductorPackage(TestCase): + actual = optimized(*example_inputs) + self.assertTrue(torch.allclose(actual, expected)) + +- @unittest.skipIf( +- _get_torch_cuda_version() < (12, 6), "Test is only supported on CUDA 12.6+" +- ) ++ @requires_triton_ptxas_compat + @unittest.skipIf(IS_FBCODE, "cmake won't work in fbcode") + @skipIfXpu # build system may be different + @torch._inductor.config.patch("test_configs.use_libtorch", True) +@@ -429,6 +425,7 @@ class TestAOTInductorPackage(TestCase): + self.cmake_compile(model, example_inputs, options, "") + + @unittest.skipIf(IS_FBCODE, "cmake won't work in fbcode") ++ @requires_triton_ptxas_compat + @skipIfXpu # build system may be different + @torch._inductor.config.patch("test_configs.use_libtorch", True) + def test_compile_standalone_cos(self): +@@ -461,9 +458,7 @@ class TestAOTInductorPackage(TestCase): + a_path = build_path / "libcos.a" + self.assertTrue(a_path.exists()) + +- @unittest.skipIf( +- _get_torch_cuda_version() < (12, 6), "Test is only supported on CUDA 12.6+" +- ) ++ @requires_triton_ptxas_compat + @unittest.skipIf(IS_FBCODE, "cmake won't work in fbcode") + @skipIfRocm # doesn't support multi-arch binary + @skipIfXpu # doesn't support multi-arch binary +@@ -519,9 +514,7 @@ class TestAOTInductorPackage(TestCase): + " 0 0 0\n 0 0 0\n[ CPUFloatType{3,3} ]\n", + ) + +- @unittest.skipIf( +- _get_torch_cuda_version() < (12, 6), "Test is only supported on CUDA 12.6+" +- ) ++ @requires_triton_ptxas_compat + @unittest.skipIf(IS_FBCODE, "cmake won't work in fbcode") + @skipIfRocm # doesn't support multi-arch binary + @skipIfXpu # doesn't support multi-arch binary +diff --git a/torch/testing/_internal/common_cuda.py b/torch/testing/_internal/common_cuda.py +index be284429114..3bd0e0a904f 100644 +--- a/torch/testing/_internal/common_cuda.py ++++ b/torch/testing/_internal/common_cuda.py +@@ -373,6 +373,11 @@ def xfailIfSM120OrLater(func): + def xfailIfDistributedNotSupported(func): + return func if not (IS_MACOS or IS_JETSON) else unittest.expectedFailure(func) + ++# When using nvcc from the CUDA toolkit its versuib must be at least the one from ptxas bundled with Triton ++TRITON_PTXAS_VERSION = (12, 8) ++requires_triton_ptxas_compat = unittest.skipIf(torch.version.hip is None and _get_torch_cuda_version() < TRITON_PTXAS_VERSION, ++ "Requires CUDA 12.8 to match Tritons ptxas version") ++ + # Importing this module should NOT eagerly initialize CUDA + if not CUDA_ALREADY_INITIALIZED_ON_IMPORT: + assert not torch.cuda.is_initialized() diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_skip-unexpected-success-in-test_fake_export.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_skip-unexpected-success-in-test_fake_export.patch new file mode 100644 index 000000000000..3667657cc175 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_skip-unexpected-success-in-test_fake_export.patch @@ -0,0 +1,104 @@ +Unexpected success in e.g. TestExportOpInfoCPU.test_fake_export___getitem___cpu_float32 + +Same with PYPI package and reported in https://github.com/pytorch/pytorch/pull/164166 + +Skip all instead of XFailing + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/export/test_export_opinfo.py b/test/export/test_export_opinfo.py +index 35d8b2895bd..409a305a3aa 100644 +--- a/test/export/test_export_opinfo.py ++++ b/test/export/test_export_opinfo.py +@@ -22,54 +22,54 @@ from torch.utils import _pytree as pytree + + # following are failing with regular torch.export.export + export_failures = { +- xfail("allclose"), +- xfail("combinations"), +- xfail("corrcoef"), +- xfail("cov"), +- xfail("equal"), +- xfail("linalg.lstsq"), +- xfail("linalg.lstsq", "grad_oriented"), +- xfail("nn.functional.ctc_loss"), +- xfail("nn.functional.gaussian_nll_loss"), +- xfail("sparse.sampled_addmm"), +- xfail("tensor_split"), ++ skip("allclose"), ++ skip("combinations"), ++ skip("corrcoef"), ++ skip("cov"), ++ skip("equal"), ++ skip("linalg.lstsq"), ++ skip("linalg.lstsq", "grad_oriented"), ++ skip("nn.functional.ctc_loss"), ++ skip("nn.functional.gaussian_nll_loss"), ++ skip("sparse.sampled_addmm"), ++ skip("tensor_split"), + } + + # following are failing fake export on cuda device + fake_export_failures = { +- xfail("geqrf"), +- xfail("histogram"), +- xfail("masked.amax"), +- xfail("masked.amin"), +- xfail("masked.argmax"), +- xfail("masked.argmin"), +- xfail("masked.logaddexp"), +- xfail("masked.logsumexp"), +- xfail("masked.mean"), +- xfail("masked.prod"), +- xfail("masked.std"), +- xfail("masked.sum"), +- xfail("masked.var"), +- xfail("nn.functional.grid_sample"), +- xfail("to_sparse"), ++ skip("geqrf"), ++ skip("histogram"), ++ skip("masked.amax"), ++ skip("masked.amin"), ++ skip("masked.argmax"), ++ skip("masked.argmin"), ++ skip("masked.logaddexp"), ++ skip("masked.logsumexp"), ++ skip("masked.mean"), ++ skip("masked.prod"), ++ skip("masked.std"), ++ skip("masked.sum"), ++ skip("masked.var"), ++ skip("nn.functional.grid_sample"), ++ skip("to_sparse"), + # cannot xfail as it is passing for cpu-only build + skip("nn.functional.conv2d"), + skip("nn.functional.scaled_dot_product_attention"), + # following are failing due to OptionalDeviceGuard +- xfail("__getitem__"), +- xfail("nn.functional.batch_norm"), +- xfail("nn.functional.instance_norm"), +- xfail("nn.functional.multi_margin_loss"), +- xfail("nonzero"), ++ skip("__getitem__"), ++ skip("nn.functional.batch_norm"), ++ skip("nn.functional.instance_norm"), ++ skip("nn.functional.multi_margin_loss"), ++ skip("nonzero"), + } + + fake_decomposition_failures = { +- xfail("linalg.matrix_rank"), +- xfail("nn.functional.binary_cross_entropy_with_logits"), +- xfail("nn.functional.instance_norm"), +- xfail("nn.functional.multi_margin_loss"), +- xfail("repeat_interleave"), +- xfail("take"), ++ skip("linalg.matrix_rank"), ++ skip("nn.functional.binary_cross_entropy_with_logits"), ++ skip("nn.functional.instance_norm"), ++ skip("nn.functional.multi_margin_loss"), ++ skip("repeat_interleave"), ++ skip("take"), + } + + diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb new file mode 100644 index 000000000000..7e00bf62fe7e --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb @@ -0,0 +1,220 @@ +name = 'PyTorch' +version = '2.9.1' +versionsuffix = '-CUDA-%(cudaver)s' + +homepage = 'https://pytorch.org/' +description = """Tensors and Dynamic neural networks in Python with strong GPU acceleration. +PyTorch is a deep learning framework that puts Python first.""" + +toolchain = {'name': 'foss', 'version': '2025b'} + +local_six_version = '1.11.0' +source_urls = [GITHUB_RELEASE] +sources = [ + '%(namelower)s-v%(version)s.tar.gz', + { + # Avoid downloading this during the build, see third_party/NNPACK/cmake/DownloadSix.cmake for the version + 'filename': f'six-{local_six_version}.tar.gz', + 'source_urls': [ + 'https://pypi.python.org/packages/16/d8/bc6316cf98419719bd59c91742194c111b6f2e85abac88e496adefaf7afe'], + } +] +patches = [ + 'PyTorch-1.12.1_add-hypothesis-suppression.patch', + 'PyTorch-1.7.0_disable-dev-shm-test.patch', + 'PyTorch-2.0.1_skip-tests-skipped-in-subprocess.patch', + 'PyTorch-2.1.0_remove-test-requiring-online-access.patch', + 'PyTorch-2.6.0_show-test-duration.patch', + 'PyTorch-2.6.0_skip-test_segfault.patch', + 'PyTorch-2.7.0_avoid_caffe2_test_cpp_jit.patch', + 'PyTorch-2.7.1_avoid-caffe2-sandcastle-test-lib.patch', + 'PyTorch-2.7.1_skip-test_data_parallel_rnn.patch', + 'PyTorch-2.7.1_skip-test_gds_fails_in_ci.patch', + 'PyTorch-2.7.1_skip-test_mixed_mm_exhaustive_dtypes.patch', + 'PyTorch-2.7.1_skip-tests-requiring-SM90.patch', + 'PyTorch-2.7.1_suport-64bit-BARs.patch', + 'PyTorch-2.7.1_tolerance-test_partial_flat_weights.patch', + 'PyTorch-2.9.0_disable-test_nan_assert.patch', + 'PyTorch-2.9.0_enable-symbolizer-in-test_workspace_allocation_error.patch', + 'PyTorch-2.9.0_fix-attention-squeeze.patch', + 'PyTorch-2.9.0_fix-FP16-CPU-tests-in-test_torchinductor_opinfo.patch', + 'PyTorch-2.9.0_fix-nccl-test-env.patch', + 'PyTorch-2.9.0_fix-test_exclude_padding.patch', + 'PyTorch-2.9.0_fix-test_version_error.patch', + 'PyTorch-2.9.0_honor-XDG_CACHE_HOME.patch', + 'PyTorch-2.9.0_increase-tolerance-in-test_transformers.patch', + 'PyTorch-2.9.0_remove-faulty-close.patch', + 'PyTorch-2.9.0_revert-pybind11-3-change.patch', + 'PyTorch-2.9.0_skip-test_benchmark_on_non_zero_device.patch', + 'PyTorch-2.9.0_skip-test_convolution1-on-H100.patch', + 'PyTorch-2.9.0_skip-test_inductor_all_gather_into_tensor_coalesced.patch', + 'PyTorch-2.9.0_skip-test_original_aten_preserved_pad_mm.patch', + 'PyTorch-2.9.0_skip-test_override-without-CUDA.patch', + 'PyTorch-2.9.0_skip-test_unbacked_reduction.patch', + 'PyTorch-2.9.0_skip-tests-requiring-CUDA-12.8.patch', + 'PyTorch-2.9.0_skip-unexpected-success-in-test_fake_export.patch', + 'PyTorch-2.9.1_skip-RingFlexAttentionTest.patch', +] +checksums = [ + {'pytorch-v2.9.1.tar.gz': 'e17504700ebc4c87f9b57059df1c4d790b769458c04db144c7a92aea90f2c92b'}, + {'six-1.11.0.tar.gz': '70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9'}, + {'PyTorch-1.12.1_add-hypothesis-suppression.patch': + 'e71ffb94ebe69f580fa70e0de84017058325fdff944866d6bd03463626edc32c'}, + {'PyTorch-1.7.0_disable-dev-shm-test.patch': '622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a'}, + {'PyTorch-2.0.1_skip-tests-skipped-in-subprocess.patch': + '166c134573a95230e39b9ea09ece3ad8072f39d370c9a88fb2a1e24f6aaac2b5'}, + {'PyTorch-2.1.0_remove-test-requiring-online-access.patch': + '35184b8c5a1b10f79e511cc25db3b8a5585a5d58b5d1aa25dd3d250200b14fd7'}, + {'PyTorch-2.6.0_show-test-duration.patch': '5508f2f9619204d9f3c356dbd4000a00d58f452ab2d64ae920eb8bc8b5484d75'}, + {'PyTorch-2.6.0_skip-test_segfault.patch': '26806bd62e6b61b56ebaa52d68ca44c415a28124f684bd2fb373557ada68ef52'}, + {'PyTorch-2.7.0_avoid_caffe2_test_cpp_jit.patch': + '2f3255e067f5c6f0d78b4fbce94784c41bddf3d01bab9673856b0d0bbc4e3fec'}, + {'PyTorch-2.7.1_avoid-caffe2-sandcastle-test-lib.patch': + 'aaf22cb431357dc78e4db895d64febf1c7ee187e8ad27bd13544d011127354d4'}, + {'PyTorch-2.7.1_skip-test_data_parallel_rnn.patch': + 'aa85b678e89db4bb41d2c5f4990f0d05959be92e61918291cb5609685b7f1841'}, + {'PyTorch-2.7.1_skip-test_gds_fails_in_ci.patch': + '503030c3591196510a3c2d95db30b28a0b396adb8b50ff0d221f6bdb1f939935'}, + {'PyTorch-2.7.1_skip-test_mixed_mm_exhaustive_dtypes.patch': + '709288abc802c9eb687c15f2677ebaf408d8325a4cb470d23cb72447ee0b8e13'}, + {'PyTorch-2.7.1_skip-tests-requiring-SM90.patch': + '7b5891a96b58d1d404c130233ec5ddbb0ad52afdb9c334bbe4e1f27f6c78ffd8'}, + {'PyTorch-2.7.1_suport-64bit-BARs.patch': '317c3d220aa87426d86e137a6c1a8f910adf9580ca0848371e0f6800c05dbde1'}, + {'PyTorch-2.7.1_tolerance-test_partial_flat_weights.patch': + 'f304440a57e00b8052a5ffbf285adad8d0fdc5a812a659420b59a20deb5a9942'}, + {'PyTorch-2.9.0_disable-test_nan_assert.patch': '98e9f98ce8fb89ae368739bc039be69040ed446a1c74ee5c2a1ef8ba60986c7d'}, + {'PyTorch-2.9.0_enable-symbolizer-in-test_workspace_allocation_error.patch': + 'ba4032b967c0393c916a26fb2b117ba40670ae8e809cb34399a6379b4e523d72'}, + {'PyTorch-2.9.0_fix-attention-squeeze.patch': '8f040e74780cab391bb4c84f86390a13230e1a309ddf65db9900d9a1c66e1288'}, + {'PyTorch-2.9.0_fix-FP16-CPU-tests-in-test_torchinductor_opinfo.patch': + 'b696d7be8c55ff1ccf8731dccf119b8792cd9593eaff457f37e76114e52346d2'}, + {'PyTorch-2.9.0_fix-nccl-test-env.patch': '9326223c400262788734ec608f6134c5d240f4d5315a8d294179a28f885d6845'}, + {'PyTorch-2.9.0_fix-test_exclude_padding.patch': + '349850874fb75d57a24437d871a4994a773e501632ce66a2adca613380a152dc'}, + {'PyTorch-2.9.0_fix-test_version_error.patch': 'b10bb10d0a353e4ba7dbef28ca5fef03a8ba552896e1982708aa90ab6f24f34f'}, + {'PyTorch-2.9.0_honor-XDG_CACHE_HOME.patch': '239631258431174e4aed8947ae6096e003a3213bfbfa112cd0cdebae89469164'}, + {'PyTorch-2.9.0_increase-tolerance-in-test_transformers.patch': + 'c27ab34900835c2a15edc26d481343a16433bfa52f635a80cbab252c1320a545'}, + {'PyTorch-2.9.0_remove-faulty-close.patch': '32ca744d68dcfa669e46ced9d2776af3dcc380dd9c3458ba7c1c432e5c5295b3'}, + {'PyTorch-2.9.0_revert-pybind11-3-change.patch': + '5289894011fefc67482b1e19c9d1c502e94a943fc7a2d5ed5a6a1eaf444570a0'}, + {'PyTorch-2.9.0_skip-test_benchmark_on_non_zero_device.patch': + '85e236431d1a5da3fb7fccc2554640898c29f5fab46a41d15b3ab61dd1f924fc'}, + {'PyTorch-2.9.0_skip-test_convolution1-on-H100.patch': + '704750c7cc08b58779907d608cd4b7505043e394fb27530b16d72a0dc27c277e'}, + {'PyTorch-2.9.0_skip-test_inductor_all_gather_into_tensor_coalesced.patch': + '644153d4c1d8267c0631df2902a6dfe8ec2a197f3374f2a2f5654e6bd0edc05e'}, + {'PyTorch-2.9.0_skip-test_original_aten_preserved_pad_mm.patch': + 'ac9e05d296cd5ff938a44662cd022efcc8133c744ca82b045c6a15bc64f67cf4'}, + {'PyTorch-2.9.0_skip-test_override-without-CUDA.patch': + '967512d1487bf1ad06982cc5b976c0b38ba062c3f3473cb4542c4b9ac0740662'}, + {'PyTorch-2.9.0_skip-test_unbacked_reduction.patch': + 'b51dd5d7c9cfeed946cbc5c7fc22f2e78e1fa52dda55569b957c20ca4ed01fe8'}, + {'PyTorch-2.9.0_skip-tests-requiring-CUDA-12.8.patch': + '6d79aff5291627b86d8fea025bf2379e4065c7d9cbef5cf83452c35922848728'}, + {'PyTorch-2.9.0_skip-unexpected-success-in-test_fake_export.patch': + '2e73f71ea0f09e613cc4a108893e7948b6daf239e3fe42fd2d3ae5d43c3cf9de'}, + {'PyTorch-2.9.1_skip-RingFlexAttentionTest.patch': + '3cf0b11136fb18c45072687eafd3024d91b504d231a4fa40e04bc62d8d6019c7'}, +] + +osdependencies = [OS_PKG_IBVERBS_DEV] + +builddependencies = [ + ('CMake', '3.31.8'), + ('hypothesis', '6.136.6'), + # For tests + ('parameterized', '0.9.0'), + ('pytest-flakefinder', '1.1.0'), + ('pytest-rerunfailures', '16.1'), + ('pytest-shard', '0.1.2'), + ('pytest-subtests', '0.15.0'), + ('tlparse', '0.4.3'), + ('optree', '0.18.0'), + ('unittest-xml-reporting', '3.2.0'), +] + +dependencies = [ + ('CUDA', '12.9.1', '', SYSTEM), + # PyTorch is very sensitive to the NCCL & cuDNN versions. (Maybe the same for cuSPARSELt) + # Prefer those (listed per CUDA version) in + # https://github.com/pytorch/pytorch/blob/main/.github/scripts/generate_binary_build_matrix.py + # or https://github.com/pytorch/pytorch/blob/main/.ci/docker/common/install_cuda.sh + ('NCCL', '2.27.7', '-CUDA-%(cudaver)s'), + ('cuDNN', '9.15.0.57', '-CUDA-%(cudaver)s', SYSTEM), + ('magma', '2.9.0', '-CUDA-%(cudaver)s'), + ('cuSPARSELt', '0.8.0.4', '-CUDA-%(cudaver)s', SYSTEM), + ('Triton', '3.5.0', '-CUDA-%(cudaver)s'), + ('Ninja', '1.13.0'), + ('Python', '3.13.5'), + ('Python-bundle-PyPI', '2025.07'), + ('expecttest', '0.3.0'), + ('GMP', '6.3.0'), + ('MPFR', '4.2.2'), + ('networkx', '3.5'), + ('numactl', '2.0.19'), + ('Pillow', '11.3.0'), + ('protobuf-python', '6.31.1'), + ('protobuf', '31.1'), + ('pybind11', '3.0.0'), + ('PuLP', '3.3.0'), + ('PyYAML', '6.0.2'), + ('pyzstd', '0.19.0'), + ('SciPy-bundle', '2025.07'), + ('sympy', '1.14.0'), + ('Z3', '4.15.1'), +] + +prebuildopts = (f"""sed -i '1i set(PYTHON_SIX_SOURCE_DIR "%(builddir)s/six-{local_six_version}")' """ + "cmake/Dependencies.cmake && ") +buildcmd = '%(python)s setup.py build' # Run the (long) build in the build step + +excluded_tests = { + '': [ + # This test seems to take too long on NVIDIA Ampere at least. + 'distributed/test_distributed_spawn', + # no xdoctest + 'doctests', + # intermittent failures on various systems + # See https://github.com/easybuilders/easybuild-easyconfigs/issues/17712 + 'distributed/rpc/test_tensorpipe_agent', + # This test is expected to fail when run in their CI, but won't in our case. + # It just checks for a "CI" env variable + 'test_ci_sanity_check_fail', + # Requires pwlf Python package + 'distributed/_tools/test_sac_ilp', 'distributed/_tools/test_sac_estimator', + # 9 failures in H100, 7 are present in PYPI package, 2 are related to GC in Python < 3.12.4 + 'dynamo/test_dynamic_shapes', + # Broken test: https://github.com/pytorch/pytorch/issues/162179 + 'distributed/_composable/fsdp/test_fully_shard_logging', + # Broken: https://github.com/pytorch/pytorch/issues/137027 + 'inductor/test_extension_backend', + # Requires optional Python packages + 'test_public_bindings', + # 1 Failure and not important + 'dynamo/test_utils', + # Packaging test only, not important for us + 'test_license', + ] +} + +runtest = ( + ' TORCH_DISABLE_ADDR2LINE=1' + ' TORCHINDUCTOR_CUTLASS_DIR=%(start_dir)s/third_party/cutlass' + ' PYTEST_ADDOPTS=--full-trace' + ' PYTHONUNBUFFERED=1' + ' %(python)s test/run_test.py' + ' --continue-through-error --pipe-logs --verbose' + ' %(excluded_tests)s' +) + +postinstallcmds = [ + "mkdir %(installdir)s/extra", + "cp -r third_party/cutlass %(installdir)s/extra/", +] + +modextrapaths = {'TORCHINDUCTOR_CUTLASS_DIR': 'extra/cutlass'} + +tests = ['PyTorch-check-cpp-extension.py', 'PyTorch-check-cutlass.py'] + +moduleclass = 'ai' diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-RingFlexAttentionTest.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-RingFlexAttentionTest.patch new file mode 100644 index 000000000000..7855d55ddafd --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-RingFlexAttentionTest.patch @@ -0,0 +1,23 @@ +test_ring_flex_attention and test_ring_flex_attention_mask both fail in similar ways: + +> torch._dynamo.exc.Unsupported: Attempted to call function marked as skipped +> ... +> Developer debug context: module: _warnings, qualname: warn, skip reason: + +See https://github.com/pytorch/pytorch/pull/161667#issuecomment-3298676991 + & https://github.com/pytorch/pytorch/issues/162843 + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/distributed/tensor/test_attention.py b/test/distributed/tensor/test_attention.py +index a2543d443e4..a28fb45e992 100644 +--- a/test/distributed/tensor/test_attention.py ++++ b/test/distributed/tensor/test_attention.py +@@ -531,6 +531,7 @@ def generate_doc_mask_mod( + return doc_mask_mod + + ++@unittest.skip("FAILS") + class RingFlexAttentionTest(DTensorTestBase): + @property + def world_size(self) -> int: diff --git a/easybuild/easyconfigs/p/parameterized/parameterized-0.9.0-GCCcore-14.3.0.eb b/easybuild/easyconfigs/p/parameterized/parameterized-0.9.0-GCCcore-14.3.0.eb new file mode 100644 index 000000000000..c9f5dbfe09f4 --- /dev/null +++ b/easybuild/easyconfigs/p/parameterized/parameterized-0.9.0-GCCcore-14.3.0.eb @@ -0,0 +1,18 @@ +easyblock = 'PythonPackage' + +name = 'parameterized' +version = '0.9.0' + +homepage = 'https://github.com/wolever/parameterized' +description = " Parameterized testing with any Python test framework " + +toolchain = {'name': 'GCCcore', 'version': '14.3.0'} + +sources = [SOURCE_TAR_GZ] +checksums = ['7fc905272cefa4f364c1a3429cbbe9c0f98b793988efb5bf90aac80f08db09b1'] + +builddependencies = [('binutils', '2.44')] + +dependencies = [('Python', '3.13.5')] + +moduleclass = 'tools' diff --git a/easybuild/easyconfigs/p/pytest-subtests/pytest-subtests-0.15.0-GCCcore-14.3.0.eb b/easybuild/easyconfigs/p/pytest-subtests/pytest-subtests-0.15.0-GCCcore-14.3.0.eb new file mode 100644 index 000000000000..8e3dac2e1a4d --- /dev/null +++ b/easybuild/easyconfigs/p/pytest-subtests/pytest-subtests-0.15.0-GCCcore-14.3.0.eb @@ -0,0 +1,22 @@ +easyblock = 'PythonPackage' + +name = 'pytest-subtests' +version = '0.15.0' + +homepage = 'https://github.com/pytest-dev/pytest-subtests' +description = "unittest subTest() support and subtests fixture." + +toolchain = {'name': 'GCCcore', 'version': '14.3.0'} + +sources = ['pytest_subtests-%(version)s.tar.gz'] +checksums = ['cb495bde05551b784b8f0b8adfaa27edb4131469a27c339b80fd8d6ba33f887c'] + +builddependencies = [ + ('binutils', '2.44'), +] +dependencies = [ + ('Python', '3.13.5'), + ('Python-bundle-PyPI', '2025.07'), +] + +moduleclass = 'tools' diff --git a/easybuild/easyconfigs/u/unittest-xml-reporting/unittest-xml-reporting-3.2.0-GCCcore-14.3.0.eb b/easybuild/easyconfigs/u/unittest-xml-reporting/unittest-xml-reporting-3.2.0-GCCcore-14.3.0.eb new file mode 100644 index 000000000000..bb0f2e3510e9 --- /dev/null +++ b/easybuild/easyconfigs/u/unittest-xml-reporting/unittest-xml-reporting-3.2.0-GCCcore-14.3.0.eb @@ -0,0 +1,23 @@ +easyblock = 'PythonPackage' + +name = 'unittest-xml-reporting' +version = '3.2.0' + +homepage = 'http://github.com/xmlrunner/unittest-xml-reporting' +description = """A unittest test runner that can save test results to XML files in xUnit format. +The files can be consumed by a wide range of tools, such as build systems, IDEs and continuous integration servers.""" + +toolchain = {'name': 'GCCcore', 'version': '14.3.0'} + +sources = [SOURCE_TAR_GZ] +checksums = ['edd8d3170b40c3a81b8cf910f46c6a304ae2847ec01036d02e9c0f9b85762d28'] + +builddependencies = [('binutils', '2.44')] +dependencies = [ + ('Python', '3.13.5'), + ('lxml', '6.0.0'), +] + +options = {'modulename': 'xmlrunner'} + +moduleclass = 'tools' From 5e96b2513a2f048acc8edff9d3338226f4079b48 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Fri, 19 Dec 2025 09:57:57 +0100 Subject: [PATCH 02/30] Add testcase --- .../p/PyTorch/PyTorch-check-cutlass.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100755 easybuild/easyconfigs/p/PyTorch/PyTorch-check-cutlass.py diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-check-cutlass.py b/easybuild/easyconfigs/p/PyTorch/PyTorch-check-cutlass.py new file mode 100755 index 000000000000..73d9951b78ac --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-check-cutlass.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python + +# Verify that PyTorch can load CUTLASS, required for the CUTLASS inductor backend +# Author: Alexander Grund (TU Dresden) + +import os +import tempfile +from torch._inductor.codegen.cuda.cutlass_utils import try_import_cutlass, config + +# Isolate from default path used +os.environ['TORCHINDUCTOR_CACHE_DIR'] = tempfile.mkdtemp(suffix='inductor_cache') +# Use empty working directory +os.chdir(tempfile.mkdtemp(suffix='cwd')) + + +if try_import_cutlass(): + print(f"CUTLASS is set up using {config.cuda.cutlass_dir}") +else: + raise RuntimeError("CUTLASS is NOT working") From 9f728f67d23a4971c5ad3472cc65436a056f37cc Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Fri, 19 Dec 2025 13:50:12 +0100 Subject: [PATCH 03/30] Add patch for GCC 14 ARM builds --- .../PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb | 3 +++ ...e-warning-incompatible-pointer-types.patch | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb index 7e00bf62fe7e..5fdbdb910c18 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb @@ -53,6 +53,7 @@ patches = [ 'PyTorch-2.9.0_skip-test_unbacked_reduction.patch', 'PyTorch-2.9.0_skip-tests-requiring-CUDA-12.8.patch', 'PyTorch-2.9.0_skip-unexpected-success-in-test_fake_export.patch', + 'PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch', 'PyTorch-2.9.1_skip-RingFlexAttentionTest.patch', ] checksums = [ @@ -114,6 +115,8 @@ checksums = [ '6d79aff5291627b86d8fea025bf2379e4065c7d9cbef5cf83452c35922848728'}, {'PyTorch-2.9.0_skip-unexpected-success-in-test_fake_export.patch': '2e73f71ea0f09e613cc4a108893e7948b6daf239e3fe42fd2d3ae5d43c3cf9de'}, + {'PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch': + '7e62576f7f2b4b7c023ad9d59ec5aef09e9bf5a7b78a0e5990956567eed85f73'}, {'PyTorch-2.9.1_skip-RingFlexAttentionTest.patch': '3cf0b11136fb18c45072687eafd3024d91b504d231a4fa40e04bc62d8d6019c7'}, ] diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch new file mode 100644 index 000000000000..a9340f035361 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch @@ -0,0 +1,19 @@ +Silence a warning that fails builds with GCC 14, especially in XNNPACK. +See https://github.com/pytorch/pytorch/pull/166873 + +Applied more broadly as we don't care about warnings anyway. + +Author: Alexander Grund (TU Dresden) + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index ce7890f002d..eb9e7a682c6 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -1040,6 +1040,7 @@ if(NOT MSVC) + append_cxx_flag_if_supported("-Wno-strict-overflow" CMAKE_CXX_FLAGS) + append_cxx_flag_if_supported("-Wno-strict-aliasing" CMAKE_CXX_FLAGS) + append_cxx_flag_if_supported("-Wno-stringop-overflow" CMAKE_CXX_FLAGS) ++ append_cxx_flag_if_supported("-Wno-incompatible-pointer-types" CMAKE_CXX_FLAGS) + append_cxx_flag_if_supported("-Wvla-extension" CMAKE_CXX_FLAGS) + append_cxx_flag_if_supported("-Wsuggest-override" CMAKE_CXX_FLAGS) + append_cxx_flag_if_supported("-Wnewline-eof" CMAKE_CXX_FLAGS) From 54f0441482ebe4492534a30304cf9b2c7d5545cd Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Fri, 19 Dec 2025 14:27:09 +0100 Subject: [PATCH 04/30] Also ignore warning for C files --- .../PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb | 2 +- ...e-warning-incompatible-pointer-types.patch | 22 +++++++++++-------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb index 5fdbdb910c18..803a79e1ec37 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb @@ -116,7 +116,7 @@ checksums = [ {'PyTorch-2.9.0_skip-unexpected-success-in-test_fake_export.patch': '2e73f71ea0f09e613cc4a108893e7948b6daf239e3fe42fd2d3ae5d43c3cf9de'}, {'PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch': - '7e62576f7f2b4b7c023ad9d59ec5aef09e9bf5a7b78a0e5990956567eed85f73'}, + '59c84af01a76afd5462f4286de3898630f23645ee813a4da366ca7fbf5d8065d'}, {'PyTorch-2.9.1_skip-RingFlexAttentionTest.patch': '3cf0b11136fb18c45072687eafd3024d91b504d231a4fa40e04bc62d8d6019c7'}, ] diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch index a9340f035361..e2ced872720e 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch @@ -6,14 +6,18 @@ Applied more broadly as we don't care about warnings anyway. Author: Alexander Grund (TU Dresden) diff --git a/CMakeLists.txt b/CMakeLists.txt -index ce7890f002d..eb9e7a682c6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt -@@ -1040,6 +1040,7 @@ if(NOT MSVC) - append_cxx_flag_if_supported("-Wno-strict-overflow" CMAKE_CXX_FLAGS) - append_cxx_flag_if_supported("-Wno-strict-aliasing" CMAKE_CXX_FLAGS) - append_cxx_flag_if_supported("-Wno-stringop-overflow" CMAKE_CXX_FLAGS) -+ append_cxx_flag_if_supported("-Wno-incompatible-pointer-types" CMAKE_CXX_FLAGS) - append_cxx_flag_if_supported("-Wvla-extension" CMAKE_CXX_FLAGS) - append_cxx_flag_if_supported("-Wsuggest-override" CMAKE_CXX_FLAGS) - append_cxx_flag_if_supported("-Wnewline-eof" CMAKE_CXX_FLAGS) +@@ -1056,6 +1056,12 @@ if(NOT MSVC) + string(APPEND CMAKE_CXX_FLAGS " -Wno-psabi") + endif() + ++ if(CMAKE_C_COMPILER_ID STREQUAL "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL "14") ++ foreach(xnn_tgt IN ITEMS XNNPACK microkernels-prod microkernels-all) ++ string(APPEND CMAKE_CXX_FLAGS " -Wno-incompatible-pointer-types") ++ string(APPEND CMAKE_C_FLAGS " -Wno-incompatible-pointer-types") ++ endforeach() ++ endif() + # Use ld.gold if available, fall back to ld.bfd (the default ld) if not + if(USE_GOLD_LINKER) + if(USE_DISTRIBUTED AND USE_MPI) From b9213995ae201e7411d58d92befb56473d49b994 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Fri, 19 Dec 2025 15:19:35 +0100 Subject: [PATCH 05/30] Move flags setting before including dependencies --- .../PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb | 2 +- ...e-warning-incompatible-pointer-types.patch | 24 +++++++++---------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb index 803a79e1ec37..8eb5345d5145 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb @@ -116,7 +116,7 @@ checksums = [ {'PyTorch-2.9.0_skip-unexpected-success-in-test_fake_export.patch': '2e73f71ea0f09e613cc4a108893e7948b6daf239e3fe42fd2d3ae5d43c3cf9de'}, {'PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch': - '59c84af01a76afd5462f4286de3898630f23645ee813a4da366ca7fbf5d8065d'}, + 'd6a3fc21de154f54ba6504f8bd4b2eca5d05bc3d1ef3fd8eb57a9e167f852eed'}, {'PyTorch-2.9.1_skip-RingFlexAttentionTest.patch': '3cf0b11136fb18c45072687eafd3024d91b504d231a4fa40e04bc62d8d6019c7'}, ] diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch index e2ced872720e..b9231002baee 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch @@ -8,16 +8,16 @@ Author: Alexander Grund (TU Dresden) diff --git a/CMakeLists.txt b/CMakeLists.txt --- a/CMakeLists.txt +++ b/CMakeLists.txt -@@ -1056,6 +1056,12 @@ if(NOT MSVC) - string(APPEND CMAKE_CXX_FLAGS " -Wno-psabi") - endif() +@@ -852,6 +852,12 @@ if(MSVC) + append_cxx_flag_if_supported("/utf-8" CMAKE_CXX_FLAGS) + endif() -+ if(CMAKE_C_COMPILER_ID STREQUAL "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL "14") -+ foreach(xnn_tgt IN ITEMS XNNPACK microkernels-prod microkernels-all) -+ string(APPEND CMAKE_CXX_FLAGS " -Wno-incompatible-pointer-types") -+ string(APPEND CMAKE_C_FLAGS " -Wno-incompatible-pointer-types") -+ endforeach() -+ endif() - # Use ld.gold if available, fall back to ld.bfd (the default ld) if not - if(USE_GOLD_LINKER) - if(USE_DISTRIBUTED AND USE_MPI) ++if(CMAKE_C_COMPILER_ID STREQUAL "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL "14") ++ string(APPEND CMAKE_CXX_FLAGS " -Wno-incompatible-pointer-types") ++ string(APPEND CMAKE_C_FLAGS " -Wno-incompatible-pointer-types") ++endif() ++ ++ + # Note for ROCM platform: 1. USE_ROCM is always ON until + # include(cmake/Dependencies.cmake) 2. USE_CUDA will become OFF during + # re-configuration Truth Table: CUDA 1st pass: USE_CUDA=True;USE_ROCM=True, From dd7464f0903ebdcf215664e337d32d4a0b1ab61b Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Fri, 19 Dec 2025 16:35:37 +0100 Subject: [PATCH 06/30] Use flag only for C --- .../p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb | 2 +- ...Torch-2.9.1_ignore-warning-incompatible-pointer-types.patch | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb index 8eb5345d5145..40d8eb7029e4 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb @@ -116,7 +116,7 @@ checksums = [ {'PyTorch-2.9.0_skip-unexpected-success-in-test_fake_export.patch': '2e73f71ea0f09e613cc4a108893e7948b6daf239e3fe42fd2d3ae5d43c3cf9de'}, {'PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch': - 'd6a3fc21de154f54ba6504f8bd4b2eca5d05bc3d1ef3fd8eb57a9e167f852eed'}, + 'c4dad43a5d76e292bb0cada56ea05e8cbd522e3e83749cf3b2c15cd1e4ff6d7b'}, {'PyTorch-2.9.1_skip-RingFlexAttentionTest.patch': '3cf0b11136fb18c45072687eafd3024d91b504d231a4fa40e04bc62d8d6019c7'}, ] diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch index b9231002baee..cebc1478b59f 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch @@ -8,12 +8,11 @@ Author: Alexander Grund (TU Dresden) diff --git a/CMakeLists.txt b/CMakeLists.txt --- a/CMakeLists.txt +++ b/CMakeLists.txt -@@ -852,6 +852,12 @@ if(MSVC) +@@ -852,6 +852,11 @@ if(MSVC) append_cxx_flag_if_supported("/utf-8" CMAKE_CXX_FLAGS) endif() +if(CMAKE_C_COMPILER_ID STREQUAL "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL "14") -+ string(APPEND CMAKE_CXX_FLAGS " -Wno-incompatible-pointer-types") + string(APPEND CMAKE_C_FLAGS " -Wno-incompatible-pointer-types") +endif() + From 54b64effc3e916572da5e11812c19462a3281241 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Fri, 19 Dec 2025 17:05:38 +0100 Subject: [PATCH 07/30] Add workaround for GCC 14 ICE --- .../PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb | 3 + ...yTorch-2.9.1_workaround-GCC-14-error.patch | 136 ++++++++++++++++++ 2 files changed, 139 insertions(+) create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_workaround-GCC-14-error.patch diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb index 40d8eb7029e4..8ceb58c33711 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb @@ -55,6 +55,7 @@ patches = [ 'PyTorch-2.9.0_skip-unexpected-success-in-test_fake_export.patch', 'PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch', 'PyTorch-2.9.1_skip-RingFlexAttentionTest.patch', + 'PyTorch-2.9.1_workaround-GCC-14-error.patch', ] checksums = [ {'pytorch-v2.9.1.tar.gz': 'e17504700ebc4c87f9b57059df1c4d790b769458c04db144c7a92aea90f2c92b'}, @@ -119,6 +120,8 @@ checksums = [ 'c4dad43a5d76e292bb0cada56ea05e8cbd522e3e83749cf3b2c15cd1e4ff6d7b'}, {'PyTorch-2.9.1_skip-RingFlexAttentionTest.patch': '3cf0b11136fb18c45072687eafd3024d91b504d231a4fa40e04bc62d8d6019c7'}, + {'PyTorch-2.9.1_workaround-GCC-14-error.patch': + '27f5ccee07cdb5ffe134a7a50de0608a6eb8723684eb0fa5dbdba6590137bcbb'}, ] osdependencies = [OS_PKG_IBVERBS_DEV] diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_workaround-GCC-14-error.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_workaround-GCC-14-error.patch new file mode 100644 index 000000000000..d0f7cc80f15b --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_workaround-GCC-14-error.patch @@ -0,0 +1,136 @@ +From d6237721c074484ea5e72fc05614587886e57fd6 Mon Sep 17 00:00:00 2001 +From: Nikita Shulga +Date: Tue, 8 Jul 2025 18:47:20 -0700 +Subject: [PATCH] [Build] Make PyTorch compilable with gcc-14 on ARM (#157867) + +Fixes numerous ICEs in vreg allocations for SVE+BF16 +``` +/pytorch/aten/src/ATen/ParallelOpenMP.h:25:9: error: unrecognizable insn: + 25 | #pragma omp parallel + | ^~~ +(insn 257 256 258 30 (set (reg:VNx8BF 449 [ bf16_vec1_217 ]) + (unspec:VNx8BF [ + (reg:VNx8BF 455) + (reg:VNx8BF 456) + ] UNSPEC_IORF)) "/pytorch/aten/src/ATen/cpu/vec/sve/vec_bfloat16.h":228:31 discrim 1 -1 + (nil)) +during RTL pass: vregs +/pytorch/aten/src/ATen/ParallelOpenMP.h:25:9: internal compiler error: in extract_insn, at recog.cc:2812 +0xd73c33 internal_error(char const*, ...) + ???:0 +0xd73d1f fancy_abort(char const*, int, char const*) + ???:0 +0x890053 _fatal_insn(char const*, rtx_def const*, char const*, int, char const*) + ???:0 +0x890087 _fatal_insn_not_found(rtx_def const*, char const*, int, char const*) + ???:0 +0x1379093 extract_insn(rtx_insn*) + ???:0 + +``` +And one in RTL-expand pass while compiling Activation.cpp +``` +during RTL pass: expand +In file included from /pytorch/aten/src/ATen/native/cpu/Activation.cpp:12, + from /pytorch/build/aten/src/ATen/native/cpu/Activation.cpp.DEFAULT.cpp:1: +/pytorch/aten/src/ATen/native/cpu/Activation.cpp: In lambda function: +/pytorch/aten/src/ATen/native/cpu/Activation.cpp:94:7: internal compiler error: Segmentation fault + 94 | }); + | ^ +/pytorch/aten/src/ATen/Dispatch.h:201:7: note: in definition of macro 'AT_DISPATCH_SWITCH' + 201 | __VA_ARGS__ \ + | ^~~~~~~~~~~ +/pytorch/aten/src/ATen/Dispatch.h:72:3: note: in expansion of macro 'AT_PRIVATE_CASE_TYPE_USING_HINT' + 72 | AT_PRIVATE_CASE_TYPE_USING_HINT(enum_type, scalar_t, __VA_ARGS__) + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/pytorch/aten/src/ATen/Dispatch.h:214:3: note: in expansion of macro 'AT_DISPATCH_CASE' + 214 | AT_DISPATCH_CASE(at::ScalarType::Double, __VA_ARGS__) \ + | ^~~~~~~~~~~~~~~~ +/pytorch/aten/src/ATen/Dispatch.h:218:34: note: in expansion of macro 'AT_DISPATCH_CASE_FLOATING_TYPES' + 218 | AT_DISPATCH_SWITCH(TYPE, NAME, AT_DISPATCH_CASE_FLOATING_TYPES(__VA_ARGS__)) + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/pytorch/aten/src/ATen/native/cpu/Activation.cpp:70:5: note: in expansion of macro 'AT_DISPATCH_FLOATING_TYPES' + 70 | AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "log_sigmoid_cpu", [&] { + | ^~~~~~~~~~~~~~~~~~~~~~~~~~ +0xd73c33 internal_error(char const*, ...) + ???:0 +0x134f987 rebuild_jump_labels(rtx_insn*) + ???:0 +``` + +Interestingly enough, attempt to compile `Unfold2d.cpp` for `-march=armv8-a+sve` (i.e. without sve+bf16) support also causes ICE +``` +/pytorch/aten/src/ATen/native/cpu/Unfold2d.cpp:221:1: error: unrecognizable insn: + 221 | } + | ^ +(insn 2918 2917 2919 296 (set (reg:VNx8BI 5917) + (unspec:VNx16BI [ + (reg:VNx8BI 5920) + (reg:VNx8BI 5922) + (const_vector:VNx4BI [ + (const_int 0 [0]) repeated x8 + ]) + ] UNSPEC_TRN1_CONV)) "/usr/include/aarch64-linux-gnu/bits/string_fortified.h":29:33 discrim 1 -1 + (expr_list:REG_EQUAL (const_vector:VNx8BI [ + (const_int 1 [0x1]) repeated x9 + (const_int 0 [0]) + (const_int 1 [0x1]) repeated x2 + (const_int 0 [0]) repeated x4 + ]) + (nil))) +during RTL pass: vregs +``` + +Fixes https://github.com/pytorch/pytorch/issues/157842 + +Pull Request resolved: https://github.com/pytorch/pytorch/pull/157867 + +diff --git a/aten/src/ATen/cpu/vec/sve/vec_bfloat16.h b/aten/src/ATen/cpu/vec/sve/vec_bfloat16.h +index 7f05c2ad166f6..1632b595c4c22 100644 +--- a/aten/src/ATen/cpu/vec/sve/vec_bfloat16.h ++++ b/aten/src/ATen/cpu/vec/sve/vec_bfloat16.h +@@ -220,8 +220,12 @@ class Vectorized { + Vectorized le(const Vectorized& other) const; + }; + +-inline std::tuple, Vectorized> convert_bfloat16_float( +- const Vectorized& a) { ++#if defined(__GNUC__) && __GNUC__ == 14 ++// Workaround for gcc-14.2.0 ICE during RTL pass: vregs when compiling for SVE ++__attribute__((optimize("no-tree-vectorize"))) ++#endif ++inline std::tuple, Vectorized> ++convert_bfloat16_float(const Vectorized& a) { + static_assert( + Vectorized::size() == 2 * Vectorized::size()); + auto zero = svreinterpret_bf16_f32(svdup_n_f32(0.0f)); +diff --git a/aten/src/ATen/native/cpu/Activation.cpp b/aten/src/ATen/native/cpu/Activation.cpp +index 52d5383e60f32..00c9f4eb25348 100644 +--- a/aten/src/ATen/native/cpu/Activation.cpp ++++ b/aten/src/ATen/native/cpu/Activation.cpp +@@ -26,6 +26,10 @@ namespace at::native { + + namespace { + ++#if defined(__GNUC__) && __GNUC__ == 14 && defined(__aarch64__) && !defined(__ARM_FEATURE_SVE) ++// Workaround for gcc-14.2.0 ICE during RTL pass: expand when compiling for NEON ++__attribute__((optimize("no-tree-vectorize"))) ++#endif + static void log_sigmoid_cpu_kernel(TensorBase &output, TensorBase &buffer, const TensorBase &input) { + if (at::isReducedFloatingType(input.scalar_type())) { + AT_DISPATCH_REDUCED_FLOATING_TYPES(input.scalar_type(), "log_sigmoid_cpu", [&]() { +diff --git a/aten/src/ATen/native/cpu/Unfold2d.cpp b/aten/src/ATen/native/cpu/Unfold2d.cpp +index 8ef0741e77af0..8c94decfff023 100644 +--- a/aten/src/ATen/native/cpu/Unfold2d.cpp ++++ b/aten/src/ATen/native/cpu/Unfold2d.cpp +@@ -169,6 +169,10 @@ static void unfolded2d_acc_channels_last( + + /* note: due to write issues, this one cannot be parallelized as well as + * unfolded2d_copy */ ++#if defined(__GNUC__) && __GNUC__ == 14 && defined(__ARM_FEATURE_SVE) && !defined(__ARM_FEATURE_BF16) ++// Workaround for gcc-14.2.0 ICE during RTL pass: vregs when compiling for SVE without BF16 ++__attribute__((optimize("no-tree-vectorize"))) ++#endif + void unfolded2d_acc_kernel( + ScalarType dtype, + void *finput_data, From bf271b172bac43f93fa4204ace50d95c3de6de5f Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Mon, 5 Jan 2026 13:38:43 +0100 Subject: [PATCH 08/30] Remove already included patch --- .../PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb | 6 +- ...yTorch-2.9.1_workaround-GCC-14-error.patch | 136 ------------------ 2 files changed, 3 insertions(+), 139 deletions(-) delete mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_workaround-GCC-14-error.patch diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb index 8ceb58c33711..874c6dce2d85 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb @@ -31,6 +31,7 @@ patches = [ 'PyTorch-2.7.1_skip-test_data_parallel_rnn.patch', 'PyTorch-2.7.1_skip-test_gds_fails_in_ci.patch', 'PyTorch-2.7.1_skip-test_mixed_mm_exhaustive_dtypes.patch', + 'PyTorch-2.7.1_skip-test_outside_linear_module_free.patch', 'PyTorch-2.7.1_skip-tests-requiring-SM90.patch', 'PyTorch-2.7.1_suport-64bit-BARs.patch', 'PyTorch-2.7.1_tolerance-test_partial_flat_weights.patch', @@ -55,7 +56,6 @@ patches = [ 'PyTorch-2.9.0_skip-unexpected-success-in-test_fake_export.patch', 'PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch', 'PyTorch-2.9.1_skip-RingFlexAttentionTest.patch', - 'PyTorch-2.9.1_workaround-GCC-14-error.patch', ] checksums = [ {'pytorch-v2.9.1.tar.gz': 'e17504700ebc4c87f9b57059df1c4d790b769458c04db144c7a92aea90f2c92b'}, @@ -79,6 +79,8 @@ checksums = [ '503030c3591196510a3c2d95db30b28a0b396adb8b50ff0d221f6bdb1f939935'}, {'PyTorch-2.7.1_skip-test_mixed_mm_exhaustive_dtypes.patch': '709288abc802c9eb687c15f2677ebaf408d8325a4cb470d23cb72447ee0b8e13'}, + {'PyTorch-2.7.1_skip-test_outside_linear_module_free.patch': + '4916a256b2b9914e4fdb930681b80df93ea561ddee2fc9978c4973a5650be5e9'}, {'PyTorch-2.7.1_skip-tests-requiring-SM90.patch': '7b5891a96b58d1d404c130233ec5ddbb0ad52afdb9c334bbe4e1f27f6c78ffd8'}, {'PyTorch-2.7.1_suport-64bit-BARs.patch': '317c3d220aa87426d86e137a6c1a8f910adf9580ca0848371e0f6800c05dbde1'}, @@ -120,8 +122,6 @@ checksums = [ 'c4dad43a5d76e292bb0cada56ea05e8cbd522e3e83749cf3b2c15cd1e4ff6d7b'}, {'PyTorch-2.9.1_skip-RingFlexAttentionTest.patch': '3cf0b11136fb18c45072687eafd3024d91b504d231a4fa40e04bc62d8d6019c7'}, - {'PyTorch-2.9.1_workaround-GCC-14-error.patch': - '27f5ccee07cdb5ffe134a7a50de0608a6eb8723684eb0fa5dbdba6590137bcbb'}, ] osdependencies = [OS_PKG_IBVERBS_DEV] diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_workaround-GCC-14-error.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_workaround-GCC-14-error.patch deleted file mode 100644 index d0f7cc80f15b..000000000000 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_workaround-GCC-14-error.patch +++ /dev/null @@ -1,136 +0,0 @@ -From d6237721c074484ea5e72fc05614587886e57fd6 Mon Sep 17 00:00:00 2001 -From: Nikita Shulga -Date: Tue, 8 Jul 2025 18:47:20 -0700 -Subject: [PATCH] [Build] Make PyTorch compilable with gcc-14 on ARM (#157867) - -Fixes numerous ICEs in vreg allocations for SVE+BF16 -``` -/pytorch/aten/src/ATen/ParallelOpenMP.h:25:9: error: unrecognizable insn: - 25 | #pragma omp parallel - | ^~~ -(insn 257 256 258 30 (set (reg:VNx8BF 449 [ bf16_vec1_217 ]) - (unspec:VNx8BF [ - (reg:VNx8BF 455) - (reg:VNx8BF 456) - ] UNSPEC_IORF)) "/pytorch/aten/src/ATen/cpu/vec/sve/vec_bfloat16.h":228:31 discrim 1 -1 - (nil)) -during RTL pass: vregs -/pytorch/aten/src/ATen/ParallelOpenMP.h:25:9: internal compiler error: in extract_insn, at recog.cc:2812 -0xd73c33 internal_error(char const*, ...) - ???:0 -0xd73d1f fancy_abort(char const*, int, char const*) - ???:0 -0x890053 _fatal_insn(char const*, rtx_def const*, char const*, int, char const*) - ???:0 -0x890087 _fatal_insn_not_found(rtx_def const*, char const*, int, char const*) - ???:0 -0x1379093 extract_insn(rtx_insn*) - ???:0 - -``` -And one in RTL-expand pass while compiling Activation.cpp -``` -during RTL pass: expand -In file included from /pytorch/aten/src/ATen/native/cpu/Activation.cpp:12, - from /pytorch/build/aten/src/ATen/native/cpu/Activation.cpp.DEFAULT.cpp:1: -/pytorch/aten/src/ATen/native/cpu/Activation.cpp: In lambda function: -/pytorch/aten/src/ATen/native/cpu/Activation.cpp:94:7: internal compiler error: Segmentation fault - 94 | }); - | ^ -/pytorch/aten/src/ATen/Dispatch.h:201:7: note: in definition of macro 'AT_DISPATCH_SWITCH' - 201 | __VA_ARGS__ \ - | ^~~~~~~~~~~ -/pytorch/aten/src/ATen/Dispatch.h:72:3: note: in expansion of macro 'AT_PRIVATE_CASE_TYPE_USING_HINT' - 72 | AT_PRIVATE_CASE_TYPE_USING_HINT(enum_type, scalar_t, __VA_ARGS__) - | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/pytorch/aten/src/ATen/Dispatch.h:214:3: note: in expansion of macro 'AT_DISPATCH_CASE' - 214 | AT_DISPATCH_CASE(at::ScalarType::Double, __VA_ARGS__) \ - | ^~~~~~~~~~~~~~~~ -/pytorch/aten/src/ATen/Dispatch.h:218:34: note: in expansion of macro 'AT_DISPATCH_CASE_FLOATING_TYPES' - 218 | AT_DISPATCH_SWITCH(TYPE, NAME, AT_DISPATCH_CASE_FLOATING_TYPES(__VA_ARGS__)) - | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/pytorch/aten/src/ATen/native/cpu/Activation.cpp:70:5: note: in expansion of macro 'AT_DISPATCH_FLOATING_TYPES' - 70 | AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "log_sigmoid_cpu", [&] { - | ^~~~~~~~~~~~~~~~~~~~~~~~~~ -0xd73c33 internal_error(char const*, ...) - ???:0 -0x134f987 rebuild_jump_labels(rtx_insn*) - ???:0 -``` - -Interestingly enough, attempt to compile `Unfold2d.cpp` for `-march=armv8-a+sve` (i.e. without sve+bf16) support also causes ICE -``` -/pytorch/aten/src/ATen/native/cpu/Unfold2d.cpp:221:1: error: unrecognizable insn: - 221 | } - | ^ -(insn 2918 2917 2919 296 (set (reg:VNx8BI 5917) - (unspec:VNx16BI [ - (reg:VNx8BI 5920) - (reg:VNx8BI 5922) - (const_vector:VNx4BI [ - (const_int 0 [0]) repeated x8 - ]) - ] UNSPEC_TRN1_CONV)) "/usr/include/aarch64-linux-gnu/bits/string_fortified.h":29:33 discrim 1 -1 - (expr_list:REG_EQUAL (const_vector:VNx8BI [ - (const_int 1 [0x1]) repeated x9 - (const_int 0 [0]) - (const_int 1 [0x1]) repeated x2 - (const_int 0 [0]) repeated x4 - ]) - (nil))) -during RTL pass: vregs -``` - -Fixes https://github.com/pytorch/pytorch/issues/157842 - -Pull Request resolved: https://github.com/pytorch/pytorch/pull/157867 - -diff --git a/aten/src/ATen/cpu/vec/sve/vec_bfloat16.h b/aten/src/ATen/cpu/vec/sve/vec_bfloat16.h -index 7f05c2ad166f6..1632b595c4c22 100644 ---- a/aten/src/ATen/cpu/vec/sve/vec_bfloat16.h -+++ b/aten/src/ATen/cpu/vec/sve/vec_bfloat16.h -@@ -220,8 +220,12 @@ class Vectorized { - Vectorized le(const Vectorized& other) const; - }; - --inline std::tuple, Vectorized> convert_bfloat16_float( -- const Vectorized& a) { -+#if defined(__GNUC__) && __GNUC__ == 14 -+// Workaround for gcc-14.2.0 ICE during RTL pass: vregs when compiling for SVE -+__attribute__((optimize("no-tree-vectorize"))) -+#endif -+inline std::tuple, Vectorized> -+convert_bfloat16_float(const Vectorized& a) { - static_assert( - Vectorized::size() == 2 * Vectorized::size()); - auto zero = svreinterpret_bf16_f32(svdup_n_f32(0.0f)); -diff --git a/aten/src/ATen/native/cpu/Activation.cpp b/aten/src/ATen/native/cpu/Activation.cpp -index 52d5383e60f32..00c9f4eb25348 100644 ---- a/aten/src/ATen/native/cpu/Activation.cpp -+++ b/aten/src/ATen/native/cpu/Activation.cpp -@@ -26,6 +26,10 @@ namespace at::native { - - namespace { - -+#if defined(__GNUC__) && __GNUC__ == 14 && defined(__aarch64__) && !defined(__ARM_FEATURE_SVE) -+// Workaround for gcc-14.2.0 ICE during RTL pass: expand when compiling for NEON -+__attribute__((optimize("no-tree-vectorize"))) -+#endif - static void log_sigmoid_cpu_kernel(TensorBase &output, TensorBase &buffer, const TensorBase &input) { - if (at::isReducedFloatingType(input.scalar_type())) { - AT_DISPATCH_REDUCED_FLOATING_TYPES(input.scalar_type(), "log_sigmoid_cpu", [&]() { -diff --git a/aten/src/ATen/native/cpu/Unfold2d.cpp b/aten/src/ATen/native/cpu/Unfold2d.cpp -index 8ef0741e77af0..8c94decfff023 100644 ---- a/aten/src/ATen/native/cpu/Unfold2d.cpp -+++ b/aten/src/ATen/native/cpu/Unfold2d.cpp -@@ -169,6 +169,10 @@ static void unfolded2d_acc_channels_last( - - /* note: due to write issues, this one cannot be parallelized as well as - * unfolded2d_copy */ -+#if defined(__GNUC__) && __GNUC__ == 14 && defined(__ARM_FEATURE_SVE) && !defined(__ARM_FEATURE_BF16) -+// Workaround for gcc-14.2.0 ICE during RTL pass: vregs when compiling for SVE without BF16 -+__attribute__((optimize("no-tree-vectorize"))) -+#endif - void unfolded2d_acc_kernel( - ScalarType dtype, - void *finput_data, From 5e4e0336d4b99bb86954351f28c3bcd3902e90a4 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Mon, 5 Jan 2026 15:05:53 +0100 Subject: [PATCH 09/30] Add missing patch --- ...skip-test_outside_linear_module_free.patch | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_skip-test_outside_linear_module_free.patch diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_skip-test_outside_linear_module_free.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_skip-test_outside_linear_module_free.patch new file mode 100644 index 000000000000..79bdea43a4d1 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_skip-test_outside_linear_module_free.patch @@ -0,0 +1,26 @@ +Test failing with PYPI package too: +> self.assertTrue(cleared) +> AssertionError: False is not true + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/dynamo/test_misc.py b/test/dynamo/test_misc.py +index 7541bd3b9d8..d0cb310bec6 100644 +--- a/test/dynamo/test_misc.py ++++ b/test/dynamo/test_misc.py +@@ -10992,6 +10992,7 @@ fn + lambda mod: mod, + ) + ++ @unittest.skip("Unreliable") + def test_outside_linear_module_free(self): + # Compared to test_linear_module_free, the linear + # layer is not the code object that is directly compiled. +@@ -11026,6 +11026,7 @@ fn + gc.collect() + self.assertTrue(cleared) + ++ @unittest.skip("Unreliable") + def test_parameter_free(self): + def model_inp_ctr(): + param = torch.nn.Parameter(torch.randn(100, 100)) From 20c68d3272c7a7e93bd567565c472c9cd4c50d23 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Wed, 7 Jan 2026 17:49:17 +0100 Subject: [PATCH 10/30] Skip tests requiring CUDA SM 9.0 --- .../PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb | 6 +- ...orch-2.9.1_skip-tests-requiring-SM90.patch | 85 +++++++++++++++++++ 2 files changed, 88 insertions(+), 3 deletions(-) create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-tests-requiring-SM90.patch diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb index 874c6dce2d85..dc399aaa34ca 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb @@ -32,7 +32,6 @@ patches = [ 'PyTorch-2.7.1_skip-test_gds_fails_in_ci.patch', 'PyTorch-2.7.1_skip-test_mixed_mm_exhaustive_dtypes.patch', 'PyTorch-2.7.1_skip-test_outside_linear_module_free.patch', - 'PyTorch-2.7.1_skip-tests-requiring-SM90.patch', 'PyTorch-2.7.1_suport-64bit-BARs.patch', 'PyTorch-2.7.1_tolerance-test_partial_flat_weights.patch', 'PyTorch-2.9.0_disable-test_nan_assert.patch', @@ -55,6 +54,7 @@ patches = [ 'PyTorch-2.9.0_skip-tests-requiring-CUDA-12.8.patch', 'PyTorch-2.9.0_skip-unexpected-success-in-test_fake_export.patch', 'PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch', + 'PyTorch-2.9.1_skip-tests-requiring-SM90.patch', 'PyTorch-2.9.1_skip-RingFlexAttentionTest.patch', ] checksums = [ @@ -81,8 +81,6 @@ checksums = [ '709288abc802c9eb687c15f2677ebaf408d8325a4cb470d23cb72447ee0b8e13'}, {'PyTorch-2.7.1_skip-test_outside_linear_module_free.patch': '4916a256b2b9914e4fdb930681b80df93ea561ddee2fc9978c4973a5650be5e9'}, - {'PyTorch-2.7.1_skip-tests-requiring-SM90.patch': - '7b5891a96b58d1d404c130233ec5ddbb0ad52afdb9c334bbe4e1f27f6c78ffd8'}, {'PyTorch-2.7.1_suport-64bit-BARs.patch': '317c3d220aa87426d86e137a6c1a8f910adf9580ca0848371e0f6800c05dbde1'}, {'PyTorch-2.7.1_tolerance-test_partial_flat_weights.patch': 'f304440a57e00b8052a5ffbf285adad8d0fdc5a812a659420b59a20deb5a9942'}, @@ -120,6 +118,8 @@ checksums = [ '2e73f71ea0f09e613cc4a108893e7948b6daf239e3fe42fd2d3ae5d43c3cf9de'}, {'PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch': 'c4dad43a5d76e292bb0cada56ea05e8cbd522e3e83749cf3b2c15cd1e4ff6d7b'}, + {'PyTorch-2.9.1_skip-tests-requiring-SM90.patch': + '7db02152db2ae70c0fd4c4602fe381e26a74b8e4f7b16b1a3554b2353d761b10'}, {'PyTorch-2.9.1_skip-RingFlexAttentionTest.patch': '3cf0b11136fb18c45072687eafd3024d91b504d231a4fa40e04bc62d8d6019c7'}, ] diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-tests-requiring-SM90.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-tests-requiring-SM90.patch new file mode 100644 index 000000000000..4dea63b7e5fd --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-tests-requiring-SM90.patch @@ -0,0 +1,85 @@ +Avoid test_intra_node_comm_all_reduce failing on e.g. A100: + +> [rank1]:E1022 09:55:08.823000 3580472 torch/testing/_internal/common_distributed.py:721] RuntimeError: CUDA error: device-side assert triggered... +> [rank1]:E1022 09:55:08.823000 3580472 torch/testing/_internal/common_distributed.py:721] exiting process 1 with exit code: 10 +> ... +> :318: st_vec: block: [0,0,0], thread: [87,0,0] Assertion `false` failed. +> /pytorch-v2.7.1/torch/csrc/distributed/c10d/CUDASymmetricMemory-inl.h:318: st_vec: block: [0,0,0], thread: [88,0,0] Assertion `false` failed. + +test_fused_all_gather_scaled_matmul fails with a NCCL error due to FP8 usage and hangs forever. +See https://github.com/pytorch/pytorch/issues/171796 + +test_fused_scaled_matmul_reduce_scatter fails with +> RuntimeError: torch._scaled_mm is only supported on CUDA devices with compute capability >= 9.0 or 8.9, or ROCm MI300+ + + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/distributed/test_c10d_nccl.py b/test/distributed/test_c10d_nccl.py +index 0a0f3ee4ca2..07702566fd8 100644 +--- a/test/distributed/test_c10d_nccl.py ++++ b/test/distributed/test_c10d_nccl.py +@@ -3350,7 +3350,7 @@ class CommTest(test_c10d_common.AbstractCommTest, MultiProcessTestCase): + @runOnRocmArch(MI300_ARCH) + def test_intra_node_comm_all_reduce(self): + from torch._C._distributed_c10d import _get_intra_node_comm_usage_counter +- from torch.testing._internal.common_cuda import SM80OrLater ++ from torch.testing._internal.common_cuda import SM90OrLater + + for peer in range(self.world_size): + if peer == self.rank: +@@ -3358,8 +3358,8 @@ class CommTest(test_c10d_common.AbstractCommTest, MultiProcessTestCase): + if not torch._C._cuda_canDeviceAccessPeer(self.rank, peer): + raise SkipTest("Test requires p2p access") + +- if not SM80OrLater: +- raise SkipTest("Test requires sm>=80") ++ if not SM90OrLater: ++ raise SkipTest("Test requires sm>=90") + + store = c10d.FileStore(self.file_name, self.world_size) + os.environ["ENABLE_INTRA_NODE_COMM"] = "1" +diff --git a/test/distributed/test_symmetric_memory.py b/test/distributed/test_symmetric_memory.py +index eeeb24bec30..9d55b620840 100644 +--- a/test/distributed/test_symmetric_memory.py ++++ b/test/distributed/test_symmetric_memory.py +@@ -4,7 +4,7 @@ import itertools + import os + import random + from contextlib import nullcontext +-from unittest import skip, skipIf ++from unittest import skip, skipIf, skipUnless + + import torch + import torch.distributed as dist +@@ -22,7 +22,7 @@ from torch.distributed._symmetric_memory import ( + restride_A_for_fused_matmul_reduce_scatter, + restride_A_shard_for_fused_all_gather_matmul, + ) +-from torch.testing._internal.common_cuda import _get_torch_cuda_version, SM90OrLater ++from torch.testing._internal.common_cuda import _get_torch_cuda_version, SM90OrLater, IS_SM89 + from torch.testing._internal.common_device_type import e4m3_type + from torch.testing._internal.common_distributed import ( + MultiProcContinuousTest, +@@ -399,6 +399,10 @@ class AsyncTPTest(MultiProcContinuousTest): + + @runOnRocmArch(MI300_ARCH) + @skip_if_lt_x_gpu(2) ++ @skipIf( ++ not SM90OrLater, ++ "_fused_all_gather_scaled_matmul_fallback w/ FP8 only supports sm>=90", ++ ) + @parametrize("gather_dim", [0, 1]) + @parametrize( + "scale_mode", ["tensor-wise", "row-wise-replicated", "row-wise-sharded"] +@@ -512,6 +516,10 @@ class AsyncTPTest(MultiProcContinuousTest): + + @skipIfRocm # AsyncTP support changed _fused_scaled_matmul_reduce_scatter_fallback API, need more changes + @skip_if_lt_x_gpu(2) ++ @skipUnless( ++ SM90OrLater or IS_SM89, ++ "torch._scaled_mm (from fused_scaled_matmul_reduce_scatter) only supports sm>=90 or 8.9", ++ ) + @parametrize("scatter_dim", [0, 1]) + @parametrize("rowwise", [True, False]) + def test_fused_scaled_matmul_reduce_scatter( From dc3a09e18a919e595aae89447e77b3c67aace8a0 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Wed, 7 Jan 2026 17:53:42 +0100 Subject: [PATCH 11/30] Remove old patch --- ...orch-2.7.1_skip-tests-requiring-SM90.patch | 34 ------------------- 1 file changed, 34 deletions(-) delete mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_skip-tests-requiring-SM90.patch diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_skip-tests-requiring-SM90.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_skip-tests-requiring-SM90.patch deleted file mode 100644 index ee60c76ddbcf..000000000000 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.7.1_skip-tests-requiring-SM90.patch +++ /dev/null @@ -1,34 +0,0 @@ -Avoid it failing on e.g. A100: - -> [rank1]:E1022 09:55:08.823000 3580472 torch/testing/_internal/common_distributed.py:721] RuntimeError: CUDA error: device-side assert triggered... -> [rank1]:E1022 09:55:08.823000 3580472 torch/testing/_internal/common_distributed.py:721] exiting process 1 with exit code: 10 -> ... -> :318: st_vec: block: [0,0,0], thread: [87,0,0] Assertion `false` failed. -> /pytorch-v2.7.1/torch/csrc/distributed/c10d/CUDASymmetricMemory-inl.h:318: st_vec: block: [0,0,0], thread: [88,0,0] Assertion `false` failed. - -Author: Alexander Grund (TU Dresden) - -diff --git a/test/distributed/test_c10d_nccl.py b/test/distributed/test_c10d_nccl.py -index 7410255d27a..603ea0b375b 100644 ---- a/test/distributed/test_c10d_nccl.py -+++ b/test/distributed/test_c10d_nccl.py -@@ -3367,7 +3367,7 @@ class CommTest(test_c10d_common.AbstractCommTest, MultiProcessTestCase): - @skip_if_rocm_multiprocess - def test_intra_node_comm_all_reduce(self): - from torch._C._distributed_c10d import _get_intra_node_comm_usage_counter -- from torch.testing._internal.common_cuda import SM80OrLater -+ from torch.testing._internal.common_cuda import SM90OrLater - - for peer in range(self.world_size): - if peer == self.rank: -@@ -3375,8 +3375,8 @@ class CommTest(test_c10d_common.AbstractCommTest, MultiProcessTestCase): - if not torch._C._cuda_canDeviceAccessPeer(self.rank, peer): - raise SkipTest("Test requires p2p access") - -- if not SM80OrLater: -- raise SkipTest("Test requires sm>=80") -+ if not SM90OrLater: -+ raise SkipTest("Test requires sm>=90") - - store = c10d.FileStore(self.file_name, self.world_size) - os.environ["ENABLE_INTRA_NODE_COMM"] = "1" From 39cf857b523cd86acf32437219fec97ae9638fd6 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Thu, 8 Jan 2026 14:15:35 +0100 Subject: [PATCH 12/30] Add patch avoiding infinite test hang --- .../PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb | 3 ++ ...d-multiprocess-tests-hanging-forever.patch | 32 +++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_avoid-multiprocess-tests-hanging-forever.patch diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb index dc399aaa34ca..7a3774727a9b 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb @@ -53,6 +53,7 @@ patches = [ 'PyTorch-2.9.0_skip-test_unbacked_reduction.patch', 'PyTorch-2.9.0_skip-tests-requiring-CUDA-12.8.patch', 'PyTorch-2.9.0_skip-unexpected-success-in-test_fake_export.patch', + 'PyTorch-2.9.1_avoid-multiprocess-tests-hanging-forever.patch', 'PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch', 'PyTorch-2.9.1_skip-tests-requiring-SM90.patch', 'PyTorch-2.9.1_skip-RingFlexAttentionTest.patch', @@ -116,6 +117,8 @@ checksums = [ '6d79aff5291627b86d8fea025bf2379e4065c7d9cbef5cf83452c35922848728'}, {'PyTorch-2.9.0_skip-unexpected-success-in-test_fake_export.patch': '2e73f71ea0f09e613cc4a108893e7948b6daf239e3fe42fd2d3ae5d43c3cf9de'}, + {'PyTorch-2.9.1_avoid-multiprocess-tests-hanging-forever.patch': + '2ce000ce59ad4157c10382ecceac263de7debab9a4db6cda5bf95038e84d0215'}, {'PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch': 'c4dad43a5d76e292bb0cada56ea05e8cbd522e3e83749cf3b2c15cd1e4ff6d7b'}, {'PyTorch-2.9.1_skip-tests-requiring-SM90.patch': diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_avoid-multiprocess-tests-hanging-forever.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_avoid-multiprocess-tests-hanging-forever.patch new file mode 100644 index 000000000000..468bc2f088e9 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_avoid-multiprocess-tests-hanging-forever.patch @@ -0,0 +1,32 @@ +A crashed child process in a test might cause the parent to never complete. +Use a timeout to avoid that. +See https://github.com/pytorch/pytorch/pull/171972 + +Author: Alexander Grund (TU Dresden) + +diff --git a/torch/testing/_internal/common_distributed.py b/torch/testing/_internal/common_distributed.py +index c1f75697fe8..47661c7a1fa 100644 +--- a/torch/testing/_internal/common_distributed.py ++++ b/torch/testing/_internal/common_distributed.py +@@ -1786,8 +1786,19 @@ class MultiProcContinuousTest(TestCase): + if self.rank == self.MAIN_PROCESS_RANK: + logger.debug(f"Waiting for workers to finish {self.id()}") # noqa: G004 + # Wait for the workers to finish the test +- for i, completion_queue in enumerate(self.completion_queues): +- rv = completion_queue.get() ++ for i, (p, completion_queue) in enumerate( ++ zip(self.processes, self.completion_queues) ++ ): ++ # When the process died before filling the completion queue `get` will never return. ++ # Hence periodically check the process for liveness ++ while True: ++ try: ++ rv = completion_queue.get(timeout=120) ++ except queue.Empty: ++ # If not alive do a last check because the timeout might have happened just before completion ++ if not p.is_alive() and completion_queue.empty(): ++ rv = RuntimeError(f"Exited with {p.exitcode}") ++ break + if isinstance(rv, BaseException): + # Hit an exception, re-raise it in the main process. + logger.warning( From caa6bf062a9c2244d8e647896eaf5043638f9648 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Thu, 15 Jan 2026 13:30:25 +0100 Subject: [PATCH 13/30] Add patch avoiding infinite test hang --- .../PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb | 2 +- ...d-multiprocess-tests-hanging-forever.patch | 47 ++++++++++++++----- 2 files changed, 37 insertions(+), 12 deletions(-) diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb index 7a3774727a9b..125b236f078e 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb @@ -118,7 +118,7 @@ checksums = [ {'PyTorch-2.9.0_skip-unexpected-success-in-test_fake_export.patch': '2e73f71ea0f09e613cc4a108893e7948b6daf239e3fe42fd2d3ae5d43c3cf9de'}, {'PyTorch-2.9.1_avoid-multiprocess-tests-hanging-forever.patch': - '2ce000ce59ad4157c10382ecceac263de7debab9a4db6cda5bf95038e84d0215'}, + '16994db6586f213cc627b9ef141fa8a03877e3975f4aa0b87931f46ed8d03c87'}, {'PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch': 'c4dad43a5d76e292bb0cada56ea05e8cbd522e3e83749cf3b2c15cd1e4ff6d7b'}, {'PyTorch-2.9.1_skip-tests-requiring-SM90.patch': diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_avoid-multiprocess-tests-hanging-forever.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_avoid-multiprocess-tests-hanging-forever.patch index 468bc2f088e9..71fc46e7ddbe 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_avoid-multiprocess-tests-hanging-forever.patch +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_avoid-multiprocess-tests-hanging-forever.patch @@ -8,7 +8,41 @@ diff --git a/torch/testing/_internal/common_distributed.py b/torch/testing/_inte index c1f75697fe8..47661c7a1fa 100644 --- a/torch/testing/_internal/common_distributed.py +++ b/torch/testing/_internal/common_distributed.py -@@ -1786,8 +1786,19 @@ class MultiProcContinuousTest(TestCase): +@@ -621,6 +621,33 @@ def cleanup_temp_dir() -> None: + tmp_dir.cleanup() + + ++def retrieve_result_from_process_queue( ++ process: torch.multiprocessing.Process, ++ queue: torch.multiprocessing.Queue, ++ timeout: Optional[int] = None, ++) -> Any: ++ """Get result from queue associated with process. ++ ++ When the process finished without putting a result or the timeout expired an exception instance will be returned""" ++ queue_timeout = 120 if timeout is None else max(10, min(120, timeout // 4)) ++ start_time = time.time() ++ # Periodically check the process for liveness ++ while True: ++ try: ++ return queue.get(timeout=queue_timeout) ++ except queue.Empty: ++ # If not alive do a last check because the timeout might have happened just before completion ++ if not process.is_alive() and queue.empty(): ++ # Clean up process to avoid keeping a zombie process ++ process.terminate() # Just to be sure ++ process.join(600) # Usually completes immediately ++ return RuntimeError(f"Exited with {process.exitcode}") ++ if timeout is not None: ++ elapsed = time.time() - start_time ++ if elapsed > timeout: ++ return RuntimeError(f"Process timeout out after {elapsed}s") ++ ++ + # Most tests operate with this worldsize + DEFAULT_WORLD_SIZE = 4 + +@@ -1786,8 +1813,10 @@ class MultiProcContinuousTest(TestCase): if self.rank == self.MAIN_PROCESS_RANK: logger.debug(f"Waiting for workers to finish {self.id()}") # noqa: G004 # Wait for the workers to finish the test @@ -17,16 +51,7 @@ index c1f75697fe8..47661c7a1fa 100644 + for i, (p, completion_queue) in enumerate( + zip(self.processes, self.completion_queues) + ): -+ # When the process died before filling the completion queue `get` will never return. -+ # Hence periodically check the process for liveness -+ while True: -+ try: -+ rv = completion_queue.get(timeout=120) -+ except queue.Empty: -+ # If not alive do a last check because the timeout might have happened just before completion -+ if not p.is_alive() and completion_queue.empty(): -+ rv = RuntimeError(f"Exited with {p.exitcode}") -+ break ++ rv = retrieve_result_from_process_queue(p, completion_queue) if isinstance(rv, BaseException): # Hit an exception, re-raise it in the main process. logger.warning( From 0a6dde086e990670d3bbbcd316aaf7d3a781e0e3 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Thu, 22 Jan 2026 16:16:33 +0100 Subject: [PATCH 14/30] Add patch avoiding infinite test hang --- .../p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb | 2 +- ...rch-2.9.1_avoid-multiprocess-tests-hanging-forever.patch | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb index 125b236f078e..ce385c49753a 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb @@ -118,7 +118,7 @@ checksums = [ {'PyTorch-2.9.0_skip-unexpected-success-in-test_fake_export.patch': '2e73f71ea0f09e613cc4a108893e7948b6daf239e3fe42fd2d3ae5d43c3cf9de'}, {'PyTorch-2.9.1_avoid-multiprocess-tests-hanging-forever.patch': - '16994db6586f213cc627b9ef141fa8a03877e3975f4aa0b87931f46ed8d03c87'}, + '86ce380e69b3b20e010d817889cb1b825b05b4054a045b00f2ac12161b77d7e4'}, {'PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch': 'c4dad43a5d76e292bb0cada56ea05e8cbd522e3e83749cf3b2c15cd1e4ff6d7b'}, {'PyTorch-2.9.1_skip-tests-requiring-SM90.patch': diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_avoid-multiprocess-tests-hanging-forever.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_avoid-multiprocess-tests-hanging-forever.patch index 71fc46e7ddbe..75e8fa00ca00 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_avoid-multiprocess-tests-hanging-forever.patch +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_avoid-multiprocess-tests-hanging-forever.patch @@ -14,7 +14,7 @@ index c1f75697fe8..47661c7a1fa 100644 +def retrieve_result_from_process_queue( + process: torch.multiprocessing.Process, -+ queue: torch.multiprocessing.Queue, ++ completion_queue: torch.multiprocessing.Queue, + timeout: Optional[int] = None, +) -> Any: + """Get result from queue associated with process. @@ -25,10 +25,10 @@ index c1f75697fe8..47661c7a1fa 100644 + # Periodically check the process for liveness + while True: + try: -+ return queue.get(timeout=queue_timeout) ++ return completion_queue.get(timeout=queue_timeout) + except queue.Empty: + # If not alive do a last check because the timeout might have happened just before completion -+ if not process.is_alive() and queue.empty(): ++ if not process.is_alive() and completion_queue.empty(): + # Clean up process to avoid keeping a zombie process + process.terminate() # Just to be sure + process.join(600) # Usually completes immediately From f093e09e2997b9f8a42772413dafd9ea2fd206a7 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Thu, 22 Jan 2026 17:26:44 +0100 Subject: [PATCH 15/30] More patches --- .../PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb | 9 +++ ...yTorch-2.9.1_fix-hypothesis-deadline.patch | 67 +++++++++++++++++++ ....9.1_fix-iteration-in-fligh-reporter.patch | 17 +++++ ...orch-2.9.1_fix-test_dist2-decorators.patch | 64 ++++++++++++++++++ 4 files changed, 157 insertions(+) create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_fix-hypothesis-deadline.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_fix-iteration-in-fligh-reporter.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_fix-test_dist2-decorators.patch diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb index ce385c49753a..4f1aac40e4d9 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb @@ -54,6 +54,9 @@ patches = [ 'PyTorch-2.9.0_skip-tests-requiring-CUDA-12.8.patch', 'PyTorch-2.9.0_skip-unexpected-success-in-test_fake_export.patch', 'PyTorch-2.9.1_avoid-multiprocess-tests-hanging-forever.patch', + 'PyTorch-2.9.1_fix-hypothesis-deadline.patch', + 'PyTorch-2.9.1_fix-iteration-in-fligh-reporter.patch', + 'PyTorch-2.9.1_fix-test_dist2-decorators.patch', 'PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch', 'PyTorch-2.9.1_skip-tests-requiring-SM90.patch', 'PyTorch-2.9.1_skip-RingFlexAttentionTest.patch', @@ -119,6 +122,12 @@ checksums = [ '2e73f71ea0f09e613cc4a108893e7948b6daf239e3fe42fd2d3ae5d43c3cf9de'}, {'PyTorch-2.9.1_avoid-multiprocess-tests-hanging-forever.patch': '86ce380e69b3b20e010d817889cb1b825b05b4054a045b00f2ac12161b77d7e4'}, + {'PyTorch-2.9.1_fix-hypothesis-deadline.patch': + 'f7a130669eee9924a303df9e2bd5743ff023a7d994b7a3e43c86dcccf0206c49'}, + {'PyTorch-2.9.1_fix-iteration-in-fligh-reporter.patch': + 'ab408275ec66e836112a50054acc4e789ef38196efeb6137c6061d60d9ac9ead'}, + {'PyTorch-2.9.1_fix-test_dist2-decorators.patch': + '118e4b275aaa27c9b51d533cb2a83d74d8fc2754fed22fb30c23ba8227c03608'}, {'PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch': 'c4dad43a5d76e292bb0cada56ea05e8cbd522e3e83749cf3b2c15cd1e4ff6d7b'}, {'PyTorch-2.9.1_skip-tests-requiring-SM90.patch': diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_fix-hypothesis-deadline.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_fix-hypothesis-deadline.patch new file mode 100644 index 000000000000..c526ea336c1d --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_fix-hypothesis-deadline.patch @@ -0,0 +1,67 @@ +The assertion at the bottom sometimes fails. + +From c4312b443fed1fd8e0e28dfe049ce61226936e99 Mon Sep 17 00:00:00 2001 +From: FFFrog +Date: Thu, 25 Sep 2025 16:32:19 +0800 +Subject: [PATCH] [Tools] Adapting the Hypothesis library (version 5.x) for use + with the PyTorch framework (#163748) + +Starting from version 5.x, the Hypothesis library removed the timeout setting and only retained the deadline. +Pull Request resolved: https://github.com/pytorch/pytorch/pull/163748 +Approved by: https://github.com/albanD, https://github.com/Skylion007 +--- + torch/testing/_internal/hypothesis_utils.py | 24 +++++++++++++++------ + 1 file changed, 18 insertions(+), 6 deletions(-) + +diff --git a/torch/testing/_internal/hypothesis_utils.py b/torch/testing/_internal/hypothesis_utils.py +index f02ef4c9e04b0..a00e1e1a048a0 100644 +--- a/torch/testing/_internal/hypothesis_utils.py ++++ b/torch/testing/_internal/hypothesis_utils.py +@@ -7,6 +7,7 @@ + + import hypothesis + from functools import reduce ++from importlib.metadata import version + from hypothesis import assume + from hypothesis import settings + from hypothesis import strategies as st +@@ -346,22 +347,33 @@ def tensor_conv( + + return X, W, b, groups, tr + ++ + # We set the deadline in the currently loaded profile. + # Creating (and loading) a separate profile overrides any settings the user + # already specified. +-hypothesis_version = hypothesis.version.__version_info__ +-current_settings = settings._profiles[settings._current_profile].__dict__ +-current_settings['deadline'] = None +-if hypothesis_version >= (3, 16, 0) and hypothesis_version < (5, 0, 0): +- current_settings['timeout'] = hypothesis.unlimited ++hypothesis_version = tuple(map(int, version("hypothesis").split(".")[:3])) ++ ++if (3, 16, 0) <= hypothesis_version < (3, 27, 0): ++ # Hypothesis 3.16 → 3.26: use `timeout` instead of `deadline` ++ settings.register_profile("no_deadline", timeout=hypothesis.unlimited) ++else: ++ # Hypothesis >=3.27: use `deadline=None` ++ settings.register_profile("no_deadline", deadline=None) ++ ++# Activate the profile ++settings.load_profile("no_deadline") ++ ++ + def assert_deadline_disabled(): ++ """Check that deadlines are effectively disabled across Hypothesis versions.""" + if hypothesis_version < (3, 27, 0): + import warnings ++ + warning_message = ( + "Your version of hypothesis is outdated. " + "To avoid `DeadlineExceeded` errors, please update. " + f"Current hypothesis version: {hypothesis.__version__}" + ) +- warnings.warn(warning_message) ++ warnings.warn(warning_message, stacklevel=2) + else: + assert settings().deadline is None diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_fix-iteration-in-fligh-reporter.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_fix-iteration-in-fligh-reporter.patch new file mode 100644 index 000000000000..3ff313cbe125 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_fix-iteration-in-fligh-reporter.patch @@ -0,0 +1,17 @@ +Avoid an error caused by modifying dict while iterating it. + +Author: Alexander Grund (TU Dresden) + +diff --git a/tools/flight_recorder/components/types.py b/tools/flight_recorder/components/types.py +index 20e093688ba..98192aeb92c 100644 +--- a/tools/flight_recorder/components/types.py ++++ b/tools/flight_recorder/components/types.py +@@ -164,7 +164,7 @@ class Database(NamedTuple): + # TODO: We need to add a schema for the following + types = [ + TypeInfo.from_type(t) # type: ignore[type-var] +- for t in globals().values() ++ for t in list(globals().values()) + if ( + isinstance(t, type) + and issubclass(t, tuple) diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_fix-test_dist2-decorators.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_fix-test_dist2-decorators.patch new file mode 100644 index 000000000000..9d98770ae698 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_fix-test_dist2-decorators.patch @@ -0,0 +1,64 @@ +The requires_gloo/requires_nccl decorator cause the function to just return. +In the way they are used this skips the initialization done by a helper function. +So the test is not skipped and then fails due to missing variables. + +Decorate the class instead. + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/distributed/test_dist2.py b/test/distributed/test_dist2.py +index b335eff1c21..ff5a1e8c028 100644 +--- a/test/distributed/test_dist2.py ++++ b/test/distributed/test_dist2.py +@@ -256,10 +256,10 @@ class Dist2MultiProcessTestCase(MultiProcessTestCase): + self.assertEqual(merged_pg.group_name, "merged_pg") + + ++@requires_gloo() + class ProcessGroupGlooTest(Dist2MultiProcessTestCase): + device = torch.device("cpu") + +- @requires_gloo() + def new_group(self) -> torch.distributed.ProcessGroup: + os.environ["RANK"] = str(self.rank) + os.environ["WORLD_SIZE"] = str(self.world_size) +@@ -273,8 +273,8 @@ class ProcessGroupGlooTest(Dist2MultiProcessTestCase): + ) + + ++@requires_nccl() + class ProcessGroupNCCLTest(Dist2MultiProcessTestCase): +- @requires_nccl() + @skip_if_lt_x_gpu(2) + def new_group(self) -> torch.distributed.ProcessGroup: + os.environ["RANK"] = str(self.rank) +diff --git a/torch/testing/_internal/common_distributed.py b/torch/testing/_internal/common_distributed.py +index c1f75697fe8..d513510d955 100644 +--- a/torch/testing/_internal/common_distributed.py ++++ b/torch/testing/_internal/common_distributed.py +@@ -330,11 +330,7 @@ def with_dist_debug_levels(levels): + return decorator + + +-def requires_gloo(): +- return skip_but_pass_in_sandcastle_if( +- not c10d.is_gloo_available(), +- "c10d was not compiled with the Gloo backend", +- ) ++requires_gloo = unittest.skipUnless(c10d.is_gloo_available(), "c10d was not compiled with the Gloo backend") + + + def requires_nccl_version(version, msg): +@@ -360,11 +356,7 @@ def requires_nccl_version(version, msg): + return decorator + + +-def requires_nccl(): +- return skip_but_pass_in_sandcastle_if( +- not c10d.is_nccl_available(), +- "c10d was not compiled with the NCCL backend", +- ) ++requires_nccl = unittest.skipUnless(c10d.is_nccl_available(), "c10d was not compiled with the NCCL backend") + + + def requires_ucc(): From 0ecb16a372ad5ef0922fb11d912e5d63af2ff06c Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Thu, 5 Feb 2026 14:21:56 +0100 Subject: [PATCH 16/30] Fix patched skip markers --- .../PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb | 2 +- .../PyTorch-2.9.1_fix-test_dist2-decorators.patch | 14 ++++++-------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb index 4f1aac40e4d9..da13a6d52ccf 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb @@ -127,7 +127,7 @@ checksums = [ {'PyTorch-2.9.1_fix-iteration-in-fligh-reporter.patch': 'ab408275ec66e836112a50054acc4e789ef38196efeb6137c6061d60d9ac9ead'}, {'PyTorch-2.9.1_fix-test_dist2-decorators.patch': - '118e4b275aaa27c9b51d533cb2a83d74d8fc2754fed22fb30c23ba8227c03608'}, + 'bf4ed805f00775ed33351de7bce40ebf4eac16aff6c61d2e91790982bc43d73b'}, {'PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch': 'c4dad43a5d76e292bb0cada56ea05e8cbd522e3e83749cf3b2c15cd1e4ff6d7b'}, {'PyTorch-2.9.1_skip-tests-requiring-SM90.patch': diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_fix-test_dist2-decorators.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_fix-test_dist2-decorators.patch index 9d98770ae698..fffd633b4511 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_fix-test_dist2-decorators.patch +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_fix-test_dist2-decorators.patch @@ -36,29 +36,27 @@ diff --git a/torch/testing/_internal/common_distributed.py b/torch/testing/_inte index c1f75697fe8..d513510d955 100644 --- a/torch/testing/_internal/common_distributed.py +++ b/torch/testing/_internal/common_distributed.py -@@ -330,11 +330,7 @@ def with_dist_debug_levels(levels): - return decorator +@@ -331,10 +331,7 @@ def with_dist_debug_levels(levels): --def requires_gloo(): + def requires_gloo(): - return skip_but_pass_in_sandcastle_if( - not c10d.is_gloo_available(), - "c10d was not compiled with the Gloo backend", - ) -+requires_gloo = unittest.skipUnless(c10d.is_gloo_available(), "c10d was not compiled with the Gloo backend") ++ return unittest.skipUnless(c10d.is_gloo_available(), "c10d was not compiled with the Gloo backend") def requires_nccl_version(version, msg): -@@ -360,11 +356,7 @@ def requires_nccl_version(version, msg): - return decorator +@@ -361,10 +358,7 @@ def requires_nccl_version(version, msg): --def requires_nccl(): + def requires_nccl(): - return skip_but_pass_in_sandcastle_if( - not c10d.is_nccl_available(), - "c10d was not compiled with the NCCL backend", - ) -+requires_nccl = unittest.skipUnless(c10d.is_nccl_available(), "c10d was not compiled with the NCCL backend") ++ return unittest.skipUnless(c10d.is_nccl_available(), "c10d was not compiled with the NCCL backend") def requires_ucc(): From 3d7005bad65022bc12664809df9ce7b652832269 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Thu, 5 Feb 2026 14:24:39 +0100 Subject: [PATCH 17/30] Add comment for DISABLE_ADDR2LINE --- .../p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb | 1 + 1 file changed, 1 insertion(+) diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb index da13a6d52ccf..b4f5b6369c1b 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb @@ -217,6 +217,7 @@ excluded_tests = { } runtest = ( + # Disable symbol resolution in stack traces that can cause hangs and slowdowns ' TORCH_DISABLE_ADDR2LINE=1' ' TORCHINDUCTOR_CUTLASS_DIR=%(start_dir)s/third_party/cutlass' ' PYTEST_ADDOPTS=--full-trace' From 26ab819296606d1fec755b02d37c96631e9c6aa5 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Mon, 9 Feb 2026 15:52:11 +0100 Subject: [PATCH 18/30] Set test timeout --- .../PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb | 4 ++++ .../PyTorch-2.9.1_set-test-timeout.patch | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_set-test-timeout.patch diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb index b4f5b6369c1b..e908a1984eec 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb @@ -58,6 +58,8 @@ patches = [ 'PyTorch-2.9.1_fix-iteration-in-fligh-reporter.patch', 'PyTorch-2.9.1_fix-test_dist2-decorators.patch', 'PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch', + 'PyTorch-2.9.1_set-test-timeout.patch', + 'PyTorch-2.9.1_skip-bool-bessel-tests.patch', 'PyTorch-2.9.1_skip-tests-requiring-SM90.patch', 'PyTorch-2.9.1_skip-RingFlexAttentionTest.patch', ] @@ -130,6 +132,8 @@ checksums = [ 'bf4ed805f00775ed33351de7bce40ebf4eac16aff6c61d2e91790982bc43d73b'}, {'PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch': 'c4dad43a5d76e292bb0cada56ea05e8cbd522e3e83749cf3b2c15cd1e4ff6d7b'}, + {'PyTorch-2.9.1_set-test-timeout.patch': '15fa1149c250b1333b0bc491f659aaf89d5d6eaf6df5ebc81eea545478c1239c'}, + {'PyTorch-2.9.1_skip-bool-bessel-tests.patch': '9c07cddaf4c1b17fe9a54874f10b8edbfb85785c40ac1e3aea11c7f4b5abca69'}, {'PyTorch-2.9.1_skip-tests-requiring-SM90.patch': '7db02152db2ae70c0fd4c4602fe381e26a74b8e4f7b16b1a3554b2353d761b10'}, {'PyTorch-2.9.1_skip-RingFlexAttentionTest.patch': diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_set-test-timeout.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_set-test-timeout.patch new file mode 100644 index 000000000000..6bfff62d3d15 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_set-test-timeout.patch @@ -0,0 +1,19 @@ +Some tests might hang forever and the default timeout will only be set when +a) --enable-timeout is passed, and +b) a `.additional_ci_files/test-times.json` exists at the root + +Manually set a timeout of 120min which should be enough for any single test. + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/run_test.py b/test/run_test.py +--- a/test/run_test.py ++++ b/test/run_test.py +@@ -604,6 +604,7 @@ def run_test( + if is_cpp_test + else None + ) ++ timeout = 60 * 120 + print_to_stderr(f"Executing {command} ... [{datetime.now()}]") + + with ExitStack() as stack: From dae9b55850761fdaf9bfb93d3a262accba67998c Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Tue, 10 Feb 2026 09:02:35 +0100 Subject: [PATCH 19/30] Add GCC 14 patch --- .../PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb | 5 ++ .../PyTorch-2.9.1_GCC14-ARM-workaround.patch | 53 +++++++++++++++++++ ...PyTorch-2.9.1_skip-bool-bessel-tests.patch | 50 +++++++++++++++++ 3 files changed, 108 insertions(+) create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_GCC14-ARM-workaround.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-bool-bessel-tests.patch diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb index e908a1984eec..1e24641cb1fe 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb @@ -57,9 +57,11 @@ patches = [ 'PyTorch-2.9.1_fix-hypothesis-deadline.patch', 'PyTorch-2.9.1_fix-iteration-in-fligh-reporter.patch', 'PyTorch-2.9.1_fix-test_dist2-decorators.patch', + 'PyTorch-2.9.1_GCC14-ARM-workaround.patch', 'PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch', 'PyTorch-2.9.1_set-test-timeout.patch', 'PyTorch-2.9.1_skip-bool-bessel-tests.patch', + 'PyTorch-2.9.1_skip-test_norm_matrix_degenerate_shapes.patch', 'PyTorch-2.9.1_skip-tests-requiring-SM90.patch', 'PyTorch-2.9.1_skip-RingFlexAttentionTest.patch', ] @@ -130,10 +132,13 @@ checksums = [ 'ab408275ec66e836112a50054acc4e789ef38196efeb6137c6061d60d9ac9ead'}, {'PyTorch-2.9.1_fix-test_dist2-decorators.patch': 'bf4ed805f00775ed33351de7bce40ebf4eac16aff6c61d2e91790982bc43d73b'}, + {'PyTorch-2.9.1_GCC14-ARM-workaround.patch': 'ea8a8662e20fae2fb3a74c7f8bf390aba80a598ab37f9131c720d25ebb14965d'}, {'PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch': 'c4dad43a5d76e292bb0cada56ea05e8cbd522e3e83749cf3b2c15cd1e4ff6d7b'}, {'PyTorch-2.9.1_set-test-timeout.patch': '15fa1149c250b1333b0bc491f659aaf89d5d6eaf6df5ebc81eea545478c1239c'}, {'PyTorch-2.9.1_skip-bool-bessel-tests.patch': '9c07cddaf4c1b17fe9a54874f10b8edbfb85785c40ac1e3aea11c7f4b5abca69'}, + {'PyTorch-2.9.1_skip-test_norm_matrix_degenerate_shapes.patch': + 'd6082e62696a38dbfbc87c228f7ccb54dba4cfc615ce158f1f3bf77e6e30ff4f'}, {'PyTorch-2.9.1_skip-tests-requiring-SM90.patch': '7db02152db2ae70c0fd4c4602fe381e26a74b8e4f7b16b1a3554b2353d761b10'}, {'PyTorch-2.9.1_skip-RingFlexAttentionTest.patch': diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_GCC14-ARM-workaround.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_GCC14-ARM-workaround.patch new file mode 100644 index 000000000000..e0504c90d06b --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_GCC14-ARM-workaround.patch @@ -0,0 +1,53 @@ +From 8fd509399e25cb4b265dff663d3f777406001f2e Mon Sep 17 00:00:00 2001 +From: Nikita Shulga <2453524+malfet@users.noreply.github.com> +Date: Tue, 10 Feb 2026 04:35:39 +0000 +Subject: [PATCH] Blunter GCC 14.2.0 workaround for SVE compilation (#174647) + +Updated preprocessor directive for GCC version check and removed BF16 condition. I.e. right now SVE256 compilation with gcc-14.2 on Debian13 for ` -march=armv8-a+sve+bf16` + +Without the fix, compilation fails with +``` +In file included from /home/dev/git/pytorch/pytorch/build/aten/src/ATen/native/cpu/Unfold2d.cpp.SVE256.cpp:1: +/home/dev/git/pytorch/pytorch/aten/src/ATen/native/cpu/Unfold2d.cpp: In function 'void at::native::{anonymous}::unfolded2d_acc_kernel(c10::ScalarType, void*, void*, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, bool)': +/home/dev/git/pytorch/pytorch/aten/src/ATen/native/cpu/Unfold2d.cpp:225:1: error: unrecognizable insn: + 225 | } + | ^ +(insn 1371 1370 1372 101 (set (reg:VNx16BI 3235) + (unspec:VNx16BI [ + (reg:VNx16BI 3232) + (reg:VNx8BI 3234) + (const_vector:VNx4BI [ + (const_int 0 [0]) repeated x8 + ]) + ] UNSPEC_TRN1_CONV)) "/home/dev/git/pytorch/pytorch/torch/headeronly/util/bit_cast.h":40:14 -1 + (nil)) +during RTL pass: vregs +/home/dev/git/pytorch/pytorch/aten/src/ATen/native/cpu/Unfold2d.cpp:225:1: internal compiler error: in extract_insn, at recog.cc:2812 +``` + +Not sure what compelled me to put such a narrow restriction in https://github.com/pytorch/pytorch/pull/157867 + +Fixes https://github.com/pytorch/pytorch/issues/172630 + +Pull Request resolved: https://github.com/pytorch/pytorch/pull/174647 +Approved by: https://github.com/seemethere +--- + aten/src/ATen/native/cpu/Unfold2d.cpp | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/aten/src/ATen/native/cpu/Unfold2d.cpp b/aten/src/ATen/native/cpu/Unfold2d.cpp +index ed69998e99f79..9ae1391e2603e 100644 +--- a/aten/src/ATen/native/cpu/Unfold2d.cpp ++++ b/aten/src/ATen/native/cpu/Unfold2d.cpp +@@ -169,8 +169,9 @@ void unfolded2d_acc_channels_last( + + /* note: due to write issues, this one cannot be parallelized as well as + * unfolded2d_copy */ +-#if defined(__GNUC__) && __GNUC__ == 14 && defined(__ARM_FEATURE_SVE) && !defined(__ARM_FEATURE_BF16) +-// Workaround for gcc-14.2.0 ICE during RTL pass: vregs when compiling for SVE without BF16 ++#if defined(__GNUC__) && __GNUC__ == 14 && defined(__ARM_FEATURE_SVE) ++// Workaround for gcc-14.2.0 ICE during RTL pass: vregs when compiling for SVE ++// NS: With or without BF16, see https://github.com/pytorch/pytorch/issues/172630 + __attribute__((optimize("no-tree-vectorize"))) + #endif + void unfolded2d_acc_kernel( diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-bool-bessel-tests.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-bool-bessel-tests.patch new file mode 100644 index 000000000000..827601fa079e --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-bool-bessel-tests.patch @@ -0,0 +1,50 @@ +From 08de54f1ea954a6da3b45d794972d3df3d72df02 Mon Sep 17 00:00:00 2001 +From: Rob Timpe +Date: Thu, 13 Nov 2025 02:23:06 +0000 +Subject: [PATCH] [3.14] Skip failing spherical_bessel_j0 tests (#167691) + +Starting with scipy 1.15, bool inputs error out. +Pull Request resolved: https://github.com/pytorch/pytorch/pull/167691 +Approved by: https://github.com/williamwen42 +--- + .../_internal/opinfo/definitions/special.py | 20 +++++++++++++++++++ + 1 file changed, 20 insertions(+) + +diff --git a/torch/testing/_internal/opinfo/definitions/special.py b/torch/testing/_internal/opinfo/definitions/special.py +index f9dc471ca98aa..47cbcb1fb4268 100644 +--- a/torch/testing/_internal/opinfo/definitions/special.py ++++ b/torch/testing/_internal/opinfo/definitions/special.py +@@ -648,6 +648,16 @@ def sample_inputs_erfcx(op_info, device, dtype, requires_grad, **kwargs): + dtypes=all_types_and(torch.bool), + ref=lambda x: scipy.special.spherical_jn(0, x) if TEST_SCIPY else None, + supports_autograd=False, ++ skips=( ++ DecorateInfo( ++ unittest.skip( ++ "Scipy doesn't support bool inputs to spherical_bessel_j0" ++ ), ++ "TestUnaryUfuncs", ++ "test_reference_numerics_normal", ++ dtypes=(torch.bool,), ++ ), ++ ), + ), + ] + +@@ -768,6 +778,16 @@ def sample_inputs_erfcx(op_info, device, dtype, requires_grad, **kwargs): + } + ), + ), ++ skips=( ++ DecorateInfo( ++ unittest.skip( ++ "Scipy doesn't support bool inputs to spherical_bessel_j0" ++ ), ++ "TestUnaryUfuncs", ++ "test_reference_numerics_normal", ++ dtypes=(torch.bool,), ++ ), ++ ), + ), + # + # Elementwise Binary Special OpInfos From 0bb1f1a4d1ab1c6618e1e45b0945804f0351da3d Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Tue, 10 Feb 2026 09:14:03 +0100 Subject: [PATCH 20/30] Add missing patch --- ..._skip-test_norm_matrix_degenerate_shapes.patch | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-test_norm_matrix_degenerate_shapes.patch diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-test_norm_matrix_degenerate_shapes.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-test_norm_matrix_degenerate_shapes.patch new file mode 100644 index 000000000000..b8437e2b5bbd --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-test_norm_matrix_degenerate_shapes.patch @@ -0,0 +1,15 @@ +This test no longer works with numpy >= 2.3.0 +See https://github.com/pytorch/pytorch/commit/a4a5d03779d876043b0a1f0c565659fc2298afd2 + +Author: Alexander Grund (TU Dresden) +diff --git a/test/test_linalg.py b/test/test_linalg.py +--- a/test/test_linalg.py ++++ b/test/test_linalg.py +@@ -2040,6 +2040,7 @@ class TestLinalg(TestCase): + run_test_case(input, ord, dim, keepdim) + + # Test degenerate shape results match numpy for linalg.norm matrix norms ++ @unittest.skipIf(np.lib.NumpyVersion(np.__version__) >= '2.3.0', 'Numpy changed handling of degenerate inputs in 2.3.0') + @skipCUDAIfNoMagma + @skipCPUIfNoLapack + @dtypes(torch.float, torch.double, torch.cfloat, torch.cdouble) From c22bceea7a9d99adb103e76f26244ff0a79e518d Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Thu, 12 Feb 2026 16:35:25 +0100 Subject: [PATCH 21/30] Add patches for test fixes and skip slow&disabled tests --- .../PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb | 41 +++++++- ...9.1_check-device-avail-test_schedule.patch | 19 ++++ .../PyTorch-2.9.1_disable-slow-tests.patch | 40 ++++++++ ...-TestExportOpInfoCPU-with-single-GPU.patch | 20 ++++ ...fix-test_recursion_in_except_handler.patch | 34 +++++++ .../PyTorch-2.9.1_normalize_tree_output.patch | 24 +++++ ...ention-test_block_mask_non_divisible.patch | 17 ++++ ...-attention-tests-on-unsupported-cpus.patch | 97 +++++++++++++++++++ ...point_save_failure_continues_serving.patch | 28 ++++++ ...i_head_attention_forward_cpu_float32.patch | 12 +++ 10 files changed, 329 insertions(+), 3 deletions(-) create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_check-device-avail-test_schedule.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_disable-slow-tests.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_fix-TestExportOpInfoCPU-with-single-GPU.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_fix-test_recursion_in_except_handler.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_normalize_tree_output.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-flex-attention-test_block_mask_non_divisible.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-flex-attention-tests-on-unsupported-cpus.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-test_checkpoint_save_failure_continues_serving.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-test_dtensor_op_db_nn_functional_multi_head_attention_forward_cpu_float32.patch diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb index 1e24641cb1fe..06569e192f03 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb @@ -9,15 +9,24 @@ PyTorch is a deep learning framework that puts Python first.""" toolchain = {'name': 'foss', 'version': '2025b'} local_six_version = '1.11.0' +# This is specific to a (tagged) release. +# Extract from `get_disabled_tests` in tools/stats/import_test_stats.py +local_disabled_tests_S3_ID = 'UsscdNP.2GMOzUxAvqIx8GAj4MuhX1Xi' source_urls = [GITHUB_RELEASE] sources = [ '%(namelower)s-v%(version)s.tar.gz', + { + 'filename': '%(name)s-%(version)s-disabled-tests.json', + 'download_filename': f'disabled-tests-condensed.json?versionId={local_disabled_tests_S3_ID}', + 'source_urls': ['https://ossci-metrics.s3.amazonaws.com'], + # See `DEFAULT_DISABLED_TESTS_FILE` in torch/testing/_internal/common_utils.py + 'extract_cmd': 'cp %s %(builddir)s/pytorch-v%(version)s/test/.pytorch-disabled-tests.json', + }, { # Avoid downloading this during the build, see third_party/NNPACK/cmake/DownloadSix.cmake for the version 'filename': f'six-{local_six_version}.tar.gz', - 'source_urls': [ - 'https://pypi.python.org/packages/16/d8/bc6316cf98419719bd59c91742194c111b6f2e85abac88e496adefaf7afe'], - } + 'source_urls': ['https://pypi.python.org/packages/source/s/six'], + }, ] patches = [ 'PyTorch-1.12.1_add-hypothesis-suppression.patch', @@ -47,6 +56,7 @@ patches = [ 'PyTorch-2.9.0_revert-pybind11-3-change.patch', 'PyTorch-2.9.0_skip-test_benchmark_on_non_zero_device.patch', 'PyTorch-2.9.0_skip-test_convolution1-on-H100.patch', + 'PyTorch-2.9.1_skip-test_dtensor_op_db_nn_functional_multi_head_attention_forward_cpu_float32.patch', 'PyTorch-2.9.0_skip-test_inductor_all_gather_into_tensor_coalesced.patch', 'PyTorch-2.9.0_skip-test_original_aten_preserved_pad_mm.patch', 'PyTorch-2.9.0_skip-test_override-without-CUDA.patch', @@ -54,19 +64,28 @@ patches = [ 'PyTorch-2.9.0_skip-tests-requiring-CUDA-12.8.patch', 'PyTorch-2.9.0_skip-unexpected-success-in-test_fake_export.patch', 'PyTorch-2.9.1_avoid-multiprocess-tests-hanging-forever.patch', + 'PyTorch-2.9.1_check-device-avail-test_schedule.patch', + 'PyTorch-2.9.1_disable-slow-tests.patch', 'PyTorch-2.9.1_fix-hypothesis-deadline.patch', 'PyTorch-2.9.1_fix-iteration-in-fligh-reporter.patch', + 'PyTorch-2.9.1_fix-TestExportOpInfoCPU-with-single-GPU.patch', 'PyTorch-2.9.1_fix-test_dist2-decorators.patch', + 'PyTorch-2.9.1_fix-test_recursion_in_except_handler.patch', 'PyTorch-2.9.1_GCC14-ARM-workaround.patch', 'PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch', + 'PyTorch-2.9.1_normalize_tree_output.patch', 'PyTorch-2.9.1_set-test-timeout.patch', 'PyTorch-2.9.1_skip-bool-bessel-tests.patch', + 'PyTorch-2.9.1_skip-flex-attention-tests-on-unsupported-cpus.patch', + 'PyTorch-2.9.1_skip-flex-attention-test_block_mask_non_divisible.patch', + 'PyTorch-2.9.1_skip-test_checkpoint_save_failure_continues_serving.patch', 'PyTorch-2.9.1_skip-test_norm_matrix_degenerate_shapes.patch', 'PyTorch-2.9.1_skip-tests-requiring-SM90.patch', 'PyTorch-2.9.1_skip-RingFlexAttentionTest.patch', ] checksums = [ {'pytorch-v2.9.1.tar.gz': 'e17504700ebc4c87f9b57059df1c4d790b769458c04db144c7a92aea90f2c92b'}, + {'PyTorch-2.9.1-disabled-tests.json': '471f8aa36e056173d09ffd421ead45539a8d35fec6e61a8a0050d92a5fcd9f04'}, {'six-1.11.0.tar.gz': '70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9'}, {'PyTorch-1.12.1_add-hypothesis-suppression.patch': 'e71ffb94ebe69f580fa70e0de84017058325fdff944866d6bd03463626edc32c'}, @@ -112,6 +131,8 @@ checksums = [ '85e236431d1a5da3fb7fccc2554640898c29f5fab46a41d15b3ab61dd1f924fc'}, {'PyTorch-2.9.0_skip-test_convolution1-on-H100.patch': '704750c7cc08b58779907d608cd4b7505043e394fb27530b16d72a0dc27c277e'}, + {'PyTorch-2.9.1_skip-test_dtensor_op_db_nn_functional_multi_head_attention_forward_cpu_float32.patch': + 'e57486cc42f3dbcae29753168febc251d070a283229e2d76ccbdf19fee53f06e'}, {'PyTorch-2.9.0_skip-test_inductor_all_gather_into_tensor_coalesced.patch': '644153d4c1d8267c0631df2902a6dfe8ec2a197f3374f2a2f5654e6bd0edc05e'}, {'PyTorch-2.9.0_skip-test_original_aten_preserved_pad_mm.patch': @@ -126,17 +147,31 @@ checksums = [ '2e73f71ea0f09e613cc4a108893e7948b6daf239e3fe42fd2d3ae5d43c3cf9de'}, {'PyTorch-2.9.1_avoid-multiprocess-tests-hanging-forever.patch': '86ce380e69b3b20e010d817889cb1b825b05b4054a045b00f2ac12161b77d7e4'}, + {'PyTorch-2.9.1_check-device-avail-test_schedule.patch': + '64c28d38ce69147565509add36d310473ce46f14a0a876d38b5049cb7fce9817'}, + {'PyTorch-2.9.1_disable-slow-tests.patch': '76e6d8f7366b91a0ddc65f73685f2b09988bb5537d10d294f9bb6a48c7fec3d0'}, {'PyTorch-2.9.1_fix-hypothesis-deadline.patch': 'f7a130669eee9924a303df9e2bd5743ff023a7d994b7a3e43c86dcccf0206c49'}, {'PyTorch-2.9.1_fix-iteration-in-fligh-reporter.patch': 'ab408275ec66e836112a50054acc4e789ef38196efeb6137c6061d60d9ac9ead'}, + {'PyTorch-2.9.1_fix-TestExportOpInfoCPU-with-single-GPU.patch': + 'bdddf5a9ba47d57ec96f4bbefc3b85c4904e44de93dc5c7a65bc03e343035ae9'}, {'PyTorch-2.9.1_fix-test_dist2-decorators.patch': 'bf4ed805f00775ed33351de7bce40ebf4eac16aff6c61d2e91790982bc43d73b'}, + {'PyTorch-2.9.1_fix-test_recursion_in_except_handler.patch': + 'e7a64dbdc202151c5bff6aac86d77b0f6e7c52dc3117e3bfe9b57ec1371f87ad'}, {'PyTorch-2.9.1_GCC14-ARM-workaround.patch': 'ea8a8662e20fae2fb3a74c7f8bf390aba80a598ab37f9131c720d25ebb14965d'}, {'PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch': 'c4dad43a5d76e292bb0cada56ea05e8cbd522e3e83749cf3b2c15cd1e4ff6d7b'}, + {'PyTorch-2.9.1_normalize_tree_output.patch': '7d5994580339b73c28de595d9e5a0448db97b7d284f17efd18909e4613d170df'}, {'PyTorch-2.9.1_set-test-timeout.patch': '15fa1149c250b1333b0bc491f659aaf89d5d6eaf6df5ebc81eea545478c1239c'}, {'PyTorch-2.9.1_skip-bool-bessel-tests.patch': '9c07cddaf4c1b17fe9a54874f10b8edbfb85785c40ac1e3aea11c7f4b5abca69'}, + {'PyTorch-2.9.1_skip-flex-attention-tests-on-unsupported-cpus.patch': + 'e544f765beac7bdb3fc0ada98a3f92fd7e511ed8874de085aa2f213cca769d40'}, + {'PyTorch-2.9.1_skip-flex-attention-test_block_mask_non_divisible.patch': + 'd8489c192da549083569e09e5f94d2a83c9e41e111b1322f86512a9c5a58c0d9'}, + {'PyTorch-2.9.1_skip-test_checkpoint_save_failure_continues_serving.patch': + 'fa22d7ed5bf20afa4798c8af3ec732b1a3f530ecc4be5c223b3796e839b0b812'}, {'PyTorch-2.9.1_skip-test_norm_matrix_degenerate_shapes.patch': 'd6082e62696a38dbfbc87c228f7ccb54dba4cfc615ce158f1f3bf77e6e30ff4f'}, {'PyTorch-2.9.1_skip-tests-requiring-SM90.patch': diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_check-device-avail-test_schedule.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_check-device-avail-test_schedule.patch new file mode 100644 index 000000000000..202d1e4a1fc7 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_check-device-avail-test_schedule.patch @@ -0,0 +1,19 @@ +Some tests fail if no accelerator is available. +> RuntimeError: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU [...] + +Check for availability to trigger CPU fallback. + +Author: Alexander Grund (TU Dresden) +diff --git a/test/distributed/pipelining/test_schedule.py b/test/distributed/pipelining/test_schedule.py +index dabf3d78a6f..d3b8bf13168 100644 +--- a/test/distributed/pipelining/test_schedule.py ++++ b/test/distributed/pipelining/test_schedule.py +@@ -53,7 +53,7 @@ from torch.testing._internal.distributed.fake_pg import FakeStore + + ARTIFACTS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "artifacts") + +-device = acc.type if (acc := torch.accelerator.current_accelerator()) else "cpu" ++device = acc.type if (acc := torch.accelerator.current_accelerator(check_available=True)) else "cpu" + logger = logging.getLogger(__name__) + torch.manual_seed(0) + diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_disable-slow-tests.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_disable-slow-tests.patch new file mode 100644 index 000000000000..8f6d6e0c7677 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_disable-slow-tests.patch @@ -0,0 +1,40 @@ +On CI it defaults to importing JSON files with slow and disabled tests. +Those are then skipped upon execution. + +Enable the default for non-CI environments to cut down testing time. +Don't check for SANDCASTLE when determining whether to skip disabled tests. +However, the disabled-tests JSON file needs to be downloaded from S3 and placed at "tests/.pytorch-disabled-tests.json". + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/run_test.py b/test/run_test.py +index 44a15d4ab2c..269d4206f3e 100755 +--- a/test/run_test.py ++++ b/test/run_test.py +@@ -531,7 +531,7 @@ def run_test( + + # NB: These features are not available for C++ tests, but there is little incentive + # to implement it because we have never seen a flaky C++ test before. +- if IS_CI and not is_cpp_test: ++ if not is_cpp_test: + ci_args = ["--import-slow-tests", "--import-disabled-tests"] + if RERUN_DISABLED_TESTS: + ci_args.append("--rerun-disabled-tests") +diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py +index bfc568bc146..7ef37cccccb 100644 +--- a/torch/testing/_internal/common_utils.py ++++ b/torch/testing/_internal/common_utils.py +@@ -2722,11 +2722,11 @@ def check_if_enable(test: unittest.TestCase): + if not TEST_WITH_SLOW: + raise unittest.SkipTest("test is slow; run with PYTORCH_TEST_WITH_SLOW to enable test") + +- if not IS_SANDCASTLE: ++ if True: + should_skip = False + skip_msg = "" + +- for disabled_test, (issue_url, platforms) in disabled_tests_dict.items(): ++ for disabled_test, (pr_num, issue_url, platforms) in disabled_tests_dict.items(): + if matches_test(disabled_test): + platform_to_conditional: dict = { + "mac": IS_MACOS, diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_fix-TestExportOpInfoCPU-with-single-GPU.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_fix-TestExportOpInfoCPU-with-single-GPU.patch new file mode 100644 index 000000000000..ebdfb00e0a34 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_fix-TestExportOpInfoCPU-with-single-GPU.patch @@ -0,0 +1,20 @@ +Fixes a failure on systems with a single GPU. +Error in `init_gpu_context` (fake_tensor.py:744): +> E torch.AcceleratorError: CUDA error: invalid device ordinal + +See: https://github.com/pytorch/pytorch/pull/164184 + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/export/test_export_opinfo.py b/test/export/test_export_opinfo.py +--- a/test/export/test_export_opinfo.py ++++ b/test/export/test_export_opinfo.py +@@ -79,7 +79,7 @@ def _test_export_helper(self, dtype, op): + mode = FakeTensorMode(allow_non_fake_inputs=True) + converter = mode.fake_tensor_converter + # intentionally avoid cuda:0 to flush out some bugs +- target_device = "cuda:1" ++ target_device = "cuda:0" + + def to_fake_device(x): + x = converter.from_real_tensor(mode, x) diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_fix-test_recursion_in_except_handler.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_fix-test_recursion_in_except_handler.patch new file mode 100644 index 000000000000..3e807729cc56 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_fix-test_recursion_in_except_handler.patch @@ -0,0 +1,34 @@ +Fix a RecursionError inside pytest when running this test. +See https://github.com/pytorch/pytorch/pull/174693 + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/dynamo/cpython/3_13/test_exceptions.py b/test/dynamo/cpython/3_13/test_exceptions.py +index 0ded70db3c7..bc8120a2d19 100644 +--- a/test/dynamo/cpython/3_13/test_exceptions.py ++++ b/test/dynamo/cpython/3_13/test_exceptions.py +@@ -1573,18 +1573,18 @@ class ExceptionTests(__TestCase): + recurse_in_body_and_except() + + recursionlimit = sys.getrecursionlimit() +- try: +- set_relative_recursion_limit(10) +- for func in (recurse_in_except, recurse_after_except, recurse_in_body_and_except): +- with self.subTest(func=func): ++ for func in (recurse_in_except, recurse_after_except, recurse_in_body_and_except): ++ with self.subTest(func=func): ++ try: ++ set_relative_recursion_limit(10) + try: + func() + except RecursionError: + pass + else: + self.fail("Should have raised a RecursionError") +- finally: +- sys.setrecursionlimit(recursionlimit) ++ finally: ++ sys.setrecursionlimit(recursionlimit) + + + @cpython_only diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_normalize_tree_output.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_normalize_tree_output.patch new file mode 100644 index 000000000000..4c708a216cbd --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_normalize_tree_output.patch @@ -0,0 +1,24 @@ +Avoid failure in TestProfilerTree.test_profiler_experimental_tree_with_stack_and_modules +with diff: +> - +> + + +See https://github.com/pytorch/pytorch/pull/174768 + +Author: Alexander Grund (TU Dresden) +diff --git a/test/profiler/test_profiler_tree.py b/test/profiler/test_profiler_tree.py +index 670e639c98e..e53fd93b273 100644 +--- a/test/profiler/test_profiler_tree.py ++++ b/test/profiler/test_profiler_tree.py +@@ -240,6 +240,11 @@ class TestProfilerTree(TestCase): + # simply coerce them into a platform independent form. If you made a + # change in the codebase which changes the trace produced, simply use + # EXPECTTEST_ACCEPT=1 to update the tests to reflect the new structure. ++ def normalize(tree): ++ return re.sub(r'of pybind11\w+ object at', 'of PyCapsule object at', tree) ++ ++ actual = normalize(actual) ++ expected = normalize(expected) + + # expecttest will not show the diff view if `len(actual) < len(expected)` + if not expecttest.ACCEPT: diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-flex-attention-test_block_mask_non_divisible.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-flex-attention-test_block_mask_non_divisible.patch new file mode 100644 index 000000000000..5e26591c68cc --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-flex-attention-test_block_mask_non_divisible.patch @@ -0,0 +1,17 @@ +This test shows segfaults, at least on some system. +PyTorch CI HUD indicates some failures with it are known. + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/inductor/test_flex_attention.py b/test/inductor/test_flex_attention.py +index 740faa0b375..ea5e311b7cd 100644 +--- a/test/inductor/test_flex_attention.py ++++ b/test/inductor/test_flex_attention.py +@@ -3474,6 +3474,7 @@ def forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1): + ) + FileCheck().check("BLOCK_M : tl.constexpr = 16").run(code[0]) + ++ @unittest.skip("Segfaults on CPU") + @supported_platform + def test_block_mask_non_divisible(self, device): + seq = torch.arange(1023, device=device) // 128 diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-flex-attention-tests-on-unsupported-cpus.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-flex-attention-tests-on-unsupported-cpus.patch new file mode 100644 index 000000000000..a6ec831fb1c3 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-flex-attention-tests-on-unsupported-cpus.patch @@ -0,0 +1,97 @@ +FlexAttention is only supported on AVX2 CPUs. +However the tests are run on CPU unconditionally when CUDA devices are available leading to: +> torch._inductor.exc.InductorError: LoweringException: NotImplementedError: torch.compile on current platform is not supported for CPU. + +Add a condition to possibly only add CUDA tests. +See https://github.com/pytorch/pytorch/pull/174881 + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/inductor/test_flex_attention.py b/test/inductor/test_flex_attention.py +index 740faa0b375..e698939d326 100644 +--- a/test/inductor/test_flex_attention.py ++++ b/test/inductor/test_flex_attention.py +@@ -48,6 +48,9 @@ from torch.testing._internal.common_device_type import ( + dtypesIfXPU, + flex_attention_supported_platform as supported_platform, + instantiate_device_type_tests, ++ IS_FLEX_ATTENTION_CPU_PLATFORM_SUPPORTED as TEST_ON_CPU, ++ IS_FLEX_ATTENTION_CUDA_PLATFORM_SUPPORTED as TEST_ON_CUDA, ++ IS_FLEX_ATTENTION_XPU_PLATFORM_SUPPORTED as TEST_ON_XPU, + largeTensorTest, + skipCPUIf, + skipCUDAIf, +@@ -177,25 +180,21 @@ class DeviceConfig: + dtypes_fast: list[torch.dtype] + + +-TEST_ON_CUDA = ( +- torch.cuda.is_available() +- and torch.utils._triton.has_triton() +- and torch.cuda.get_device_capability() >= (8, 0) +-) +-TEST_ON_XPU = torch.xpu.is_available() and torch.utils._triton.has_triton() +- + device_configs = {} ++# Tests are skipped when no device is supported, so CPU as default is safe ++test_device = ("cpu",) + if HAS_GPU: + if TEST_ON_CUDA: +- test_device = ( +- "cuda", +- "cpu", +- ) ++ if TEST_ON_CPU: ++ test_device = ( ++ "cuda", ++ "cpu", ++ ) ++ else: ++ test_device = ("cuda",) + elif TEST_ON_XPU: + torch._C._set_onednn_allow_tf32(True) + test_device = ("xpu",) +-else: +- test_device = ("cpu",) + + + class SubstringSet: +diff --git a/torch/testing/_internal/common_device_type.py b/torch/testing/_internal/common_device_type.py +index 8971eca1bb2..6b14f9db105 100644 +--- a/torch/testing/_internal/common_device_type.py ++++ b/torch/testing/_internal/common_device_type.py +@@ -1972,23 +1972,25 @@ def get_all_device_types() -> list[str]: + + # skip since currently flex attention requires at least `avx2` support on CPU. + IS_FLEX_ATTENTION_CPU_PLATFORM_SUPPORTED = ( +- not torch.xpu.is_available() +- and not torch.cuda.is_available() +- and not IS_MACOS ++ not IS_MACOS + and torch.cpu._is_avx2_supported() + and os.getenv("ATEN_CPU_CAPABILITY") != "default" + ) + IS_FLEX_ATTENTION_XPU_PLATFORM_SUPPORTED = ( + torch.xpu.is_available() and torch.utils._triton.has_triton() + ) ++IS_FLEX_ATTENTION_CUDA_PLATFORM_SUPPORTED = ( ++ torch.cuda.is_available() ++ and torch.utils._triton.has_triton() ++ and torch.cuda.get_device_capability() >= (8, 0) ++) + flex_attention_supported_platform = unittest.skipUnless( + IS_FLEX_ATTENTION_XPU_PLATFORM_SUPPORTED +- or IS_FLEX_ATTENTION_CPU_PLATFORM_SUPPORTED +- or ( +- torch.cuda.is_available() +- and torch.utils._triton.has_triton() +- and torch.cuda.get_device_capability() >= (8, 0) +- ), ++ or (IS_FLEX_ATTENTION_CPU_PLATFORM_SUPPORTED ++ and not torch.xpu.is_available() ++ and not torch.cuda.is_available() ++ ) ++ or IS_FLEX_ATTENTION_CUDA_PLATFORM_SUPPORTED, + "Requires CUDA and Triton, Intel GPU and triton, or CPU with avx2 and later", + ) + if torch.version.hip and "gfx94" in torch.cuda.get_device_properties(0).gcnArchName: diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-test_checkpoint_save_failure_continues_serving.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-test_checkpoint_save_failure_continues_serving.patch new file mode 100644 index 000000000000..31e2baaf9160 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-test_checkpoint_save_failure_continues_serving.patch @@ -0,0 +1,28 @@ +The test fails with +> AssertionError: 'fail_once policy triggered failure' not found in 'cannot pickle code objects' + +This is caused by a change in Python 3.13 although it only worked by accident in earlier versions. +See https://github.com/pytorch/pytorch/issues/174669 + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/distributed/checkpoint/test_async_process_executor.py b/test/distributed/checkpoint/test_async_process_executor.py +index 9dc7095b0d6..36e639803b2 100644 +--- a/test/distributed/checkpoint/test_async_process_executor.py ++++ b/test/distributed/checkpoint/test_async_process_executor.py +@@ -1,6 +1,7 @@ + # Owner(s): ["oncall: distributed checkpointing"] + + import sys ++import unittest + from unittest.mock import patch + + import torch +@@ -100,6 +101,7 @@ class TestStorageWriter(StorageWriter): + class TestAsyncProcessExecutor(DTensorTestBase): + """Test suite for async checkpoint process executor error handling using public APIs.""" + ++ @unittest.skipIf(sys.version_info >= (3, 13), "Can't pickle tracebacks") + @with_comms + def test_checkpoint_save_failure_continues_serving(self) -> None: + """Test that checkpoint save failure doesn't exit process, continues serving.""" diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-test_dtensor_op_db_nn_functional_multi_head_attention_forward_cpu_float32.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-test_dtensor_op_db_nn_functional_multi_head_attention_forward_cpu_float32.patch new file mode 100644 index 000000000000..3c5dd5523dc5 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-test_dtensor_op_db_nn_functional_multi_head_attention_forward_cpu_float32.patch @@ -0,0 +1,12 @@ +diff --git a/test/distributed/tensor/test_dtensor_ops.py b/test/distributed/tensor/test_dtensor_ops.py +index 8c650f6b0ce..04cfa7d4cc2 100644 +--- a/test/distributed/tensor/test_dtensor_ops.py ++++ b/test/distributed/tensor/test_dtensor_ops.py +@@ -463,6 +463,7 @@ dtensor_fails = { + skip("nn.functional.feature_alpha_dropout", "without_train"), + skip("nn.functional.hinge_embedding_loss"), + skip("nn.functional.cosine_embedding_loss"), ++ skip("nn.functional.multi_head_attention_forward"), # randomness + skip("fft.hfft"), + skip("fft.hfft2"), + skip("fft.hfft2"), From 03217d9ce1fa49ba62be3ce7e11a2e1de95e9662 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Fri, 13 Feb 2026 11:07:09 +0100 Subject: [PATCH 22/30] Add PyTorch-2.6.0_fix-server-in-test_control_plane --- .../p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb | 3 +++ 1 file changed, 3 insertions(+) diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb index 06569e192f03..7379ad747b9e 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb @@ -33,6 +33,7 @@ patches = [ 'PyTorch-1.7.0_disable-dev-shm-test.patch', 'PyTorch-2.0.1_skip-tests-skipped-in-subprocess.patch', 'PyTorch-2.1.0_remove-test-requiring-online-access.patch', + 'PyTorch-2.6.0_fix-server-in-test_control_plane.patch', 'PyTorch-2.6.0_show-test-duration.patch', 'PyTorch-2.6.0_skip-test_segfault.patch', 'PyTorch-2.7.0_avoid_caffe2_test_cpp_jit.patch', @@ -94,6 +95,8 @@ checksums = [ '166c134573a95230e39b9ea09ece3ad8072f39d370c9a88fb2a1e24f6aaac2b5'}, {'PyTorch-2.1.0_remove-test-requiring-online-access.patch': '35184b8c5a1b10f79e511cc25db3b8a5585a5d58b5d1aa25dd3d250200b14fd7'}, + {'PyTorch-2.6.0_fix-server-in-test_control_plane.patch': + '1337689ff28ecaa8d1d0edf60d322bcdd7846fec040925325d357b19eb6e4342'}, {'PyTorch-2.6.0_show-test-duration.patch': '5508f2f9619204d9f3c356dbd4000a00d58f452ab2d64ae920eb8bc8b5484d75'}, {'PyTorch-2.6.0_skip-test_segfault.patch': '26806bd62e6b61b56ebaa52d68ca44c415a28124f684bd2fb373557ada68ef52'}, {'PyTorch-2.7.0_avoid_caffe2_test_cpp_jit.patch': From db7aefc04c480f8887cd61d5dfc91d06a3ad34f3 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Tue, 3 Mar 2026 16:29:53 +0100 Subject: [PATCH 23/30] Fix race condition in checking for disabled tests --- .../PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb | 2 +- .../PyTorch-2.9.1_disable-slow-tests.patch | 24 +++++++++++++++---- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb index 7379ad747b9e..ee2f5307c06d 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb @@ -152,7 +152,7 @@ checksums = [ '86ce380e69b3b20e010d817889cb1b825b05b4054a045b00f2ac12161b77d7e4'}, {'PyTorch-2.9.1_check-device-avail-test_schedule.patch': '64c28d38ce69147565509add36d310473ce46f14a0a876d38b5049cb7fce9817'}, - {'PyTorch-2.9.1_disable-slow-tests.patch': '76e6d8f7366b91a0ddc65f73685f2b09988bb5537d10d294f9bb6a48c7fec3d0'}, + {'PyTorch-2.9.1_disable-slow-tests.patch': '6b365a3531b0ac5446b5f0e8ab924b5e5742cd0331e6d9ec979118a3ef0ffc09'}, {'PyTorch-2.9.1_fix-hypothesis-deadline.patch': 'f7a130669eee9924a303df9e2bd5743ff023a7d994b7a3e43c86dcccf0206c49'}, {'PyTorch-2.9.1_fix-iteration-in-fligh-reporter.patch': diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_disable-slow-tests.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_disable-slow-tests.patch index 8f6d6e0c7677..9db987094fff 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_disable-slow-tests.patch +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_disable-slow-tests.patch @@ -5,10 +5,14 @@ Enable the default for non-CI environments to cut down testing time. Don't check for SANDCASTLE when determining whether to skip disabled tests. However, the disabled-tests JSON file needs to be downloaded from S3 and placed at "tests/.pytorch-disabled-tests.json". +This file may be modified and/or redownloaded in import_test_stats.py +Disable this by just returning it's content as-if it is always up to date. +If it doesn't exist the failure will be handled by the calling function. +This modification removes the PR number field, so make it optional in the tuple expansion to allow either format. + Author: Alexander Grund (TU Dresden) diff --git a/test/run_test.py b/test/run_test.py -index 44a15d4ab2c..269d4206f3e 100755 --- a/test/run_test.py +++ b/test/run_test.py @@ -531,7 +531,7 @@ def run_test( @@ -20,8 +24,19 @@ index 44a15d4ab2c..269d4206f3e 100755 ci_args = ["--import-slow-tests", "--import-disabled-tests"] if RERUN_DISABLED_TESTS: ci_args.append("--rerun-disabled-tests") +diff --git a/tools/stats/import_test_stats.py b/tools/stats/import_test_stats.py +--- a/tools/stats/import_test_stats.py ++++ b/tools/stats/import_test_stats.py +@@ -47,6 +47,8 @@ def fetch_and_cache( + Path(dirpath).mkdir(exist_ok=True) + + path = os.path.join(dirpath, name) ++ with open(path) as f: ++ return cast(dict[str, Any], json.load(f)) + print(f"Downloading {url} to {path}") + + def is_cached_file_valid() -> bool: diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py -index bfc568bc146..7ef37cccccb 100644 --- a/torch/testing/_internal/common_utils.py +++ b/torch/testing/_internal/common_utils.py @@ -2722,11 +2722,11 @@ def check_if_enable(test: unittest.TestCase): @@ -32,9 +47,10 @@ index bfc568bc146..7ef37cccccb 100644 + if True: should_skip = False skip_msg = "" - +- - for disabled_test, (issue_url, platforms) in disabled_tests_dict.items(): -+ for disabled_test, (pr_num, issue_url, platforms) in disabled_tests_dict.items(): ++ # Allow for a potentially existing PR number ++ for disabled_test, (*pr_num, issue_url, platforms) in disabled_tests_dict.items(): if matches_test(disabled_test): platform_to_conditional: dict = { "mac": IS_MACOS, From 9fe965ec5cc5a2e6c584caecbbe4f04a7841070b Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Wed, 4 Mar 2026 15:27:44 +0100 Subject: [PATCH 24/30] Remove pytest-shard --- .../p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb | 1 - 1 file changed, 1 deletion(-) diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb index ee2f5307c06d..039f2543278e 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb @@ -192,7 +192,6 @@ builddependencies = [ ('parameterized', '0.9.0'), ('pytest-flakefinder', '1.1.0'), ('pytest-rerunfailures', '16.1'), - ('pytest-shard', '0.1.2'), ('pytest-subtests', '0.15.0'), ('tlparse', '0.4.3'), ('optree', '0.18.0'), From c931bb736f6d3b8553b7dea408aba1280c28761c Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Thu, 5 Mar 2026 16:26:22 +0100 Subject: [PATCH 25/30] Add more patches --- .../PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb | 8 ++ .../PyTorch-2.9.1_dont-print-test-items.patch | 24 ++++ ....9.1_fix-DDPCommHookType-python-3.13.patch | 120 ++++++++++++++++++ ...yTorch-2.9.1_skip-cutlass-addmm-test.patch | 16 +++ 4 files changed, 168 insertions(+) create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_dont-print-test-items.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_fix-DDPCommHookType-python-3.13.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-cutlass-addmm-test.patch diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb index 039f2543278e..b5f59f393d67 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb @@ -67,6 +67,8 @@ patches = [ 'PyTorch-2.9.1_avoid-multiprocess-tests-hanging-forever.patch', 'PyTorch-2.9.1_check-device-avail-test_schedule.patch', 'PyTorch-2.9.1_disable-slow-tests.patch', + 'PyTorch-2.9.1_dont-print-test-items.patch', + 'PyTorch-2.9.1_fix-DDPCommHookType-python-3.13.patch', 'PyTorch-2.9.1_fix-hypothesis-deadline.patch', 'PyTorch-2.9.1_fix-iteration-in-fligh-reporter.patch', 'PyTorch-2.9.1_fix-TestExportOpInfoCPU-with-single-GPU.patch', @@ -77,6 +79,7 @@ patches = [ 'PyTorch-2.9.1_normalize_tree_output.patch', 'PyTorch-2.9.1_set-test-timeout.patch', 'PyTorch-2.9.1_skip-bool-bessel-tests.patch', + 'PyTorch-2.9.1_skip-cutlass-addmm-test.patch', 'PyTorch-2.9.1_skip-flex-attention-tests-on-unsupported-cpus.patch', 'PyTorch-2.9.1_skip-flex-attention-test_block_mask_non_divisible.patch', 'PyTorch-2.9.1_skip-test_checkpoint_save_failure_continues_serving.patch', @@ -153,6 +156,9 @@ checksums = [ {'PyTorch-2.9.1_check-device-avail-test_schedule.patch': '64c28d38ce69147565509add36d310473ce46f14a0a876d38b5049cb7fce9817'}, {'PyTorch-2.9.1_disable-slow-tests.patch': '6b365a3531b0ac5446b5f0e8ab924b5e5742cd0331e6d9ec979118a3ef0ffc09'}, + {'PyTorch-2.9.1_dont-print-test-items.patch': '2b524cf3d557c0672feefc3a7165e5555e549b0720647a84d546f769cea0be07'}, + {'PyTorch-2.9.1_fix-DDPCommHookType-python-3.13.patch': + 'd7bafe8340bba9dd909475fc62b739b0ce3f95d3409479ef8c5929351dd2a05d'}, {'PyTorch-2.9.1_fix-hypothesis-deadline.patch': 'f7a130669eee9924a303df9e2bd5743ff023a7d994b7a3e43c86dcccf0206c49'}, {'PyTorch-2.9.1_fix-iteration-in-fligh-reporter.patch': @@ -169,6 +175,8 @@ checksums = [ {'PyTorch-2.9.1_normalize_tree_output.patch': '7d5994580339b73c28de595d9e5a0448db97b7d284f17efd18909e4613d170df'}, {'PyTorch-2.9.1_set-test-timeout.patch': '15fa1149c250b1333b0bc491f659aaf89d5d6eaf6df5ebc81eea545478c1239c'}, {'PyTorch-2.9.1_skip-bool-bessel-tests.patch': '9c07cddaf4c1b17fe9a54874f10b8edbfb85785c40ac1e3aea11c7f4b5abca69'}, + {'PyTorch-2.9.1_skip-cutlass-addmm-test.patch': + '1f81a8a9eea8eda51fc93dff84cd994772febf4fd05d77efbf21b8440dadfd4e'}, {'PyTorch-2.9.1_skip-flex-attention-tests-on-unsupported-cpus.patch': 'e544f765beac7bdb3fc0ada98a3f92fd7e511ed8874de085aa2f213cca769d40'}, {'PyTorch-2.9.1_skip-flex-attention-test_block_mask_non_divisible.patch': diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_dont-print-test-items.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_dont-print-test-items.patch new file mode 100644 index 000000000000..b029f0a8a5a8 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_dont-print-test-items.patch @@ -0,0 +1,24 @@ +Reduce verbosity of the test output by not showing all ~270k test names. + +Author: Alexander Grund (TU Dresden) +diff --git a/test/run_test.py b/test/run_test.py +--- a/test/run_test.py ++++ b/test/run_test.py +@@ -623,7 +623,7 @@ def run_test( + test_file, + ) + else: +- command.extend([f"--sc={stepcurrent_key}", "--print-items"]) ++ command.extend([f"--sc={stepcurrent_key}"]) + ret_code, was_rerun = retry_shell( + command, + test_directory, +@@ -725,7 +725,7 @@ def run_test_retries( + + num_failures = defaultdict(int) + +- print_items = ["--print-items"] ++ print_items = [] + sc_command = f"--sc={stepcurrent_key}" + while True: + ret_code, _ = retry_shell( diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_fix-DDPCommHookType-python-3.13.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_fix-DDPCommHookType-python-3.13.patch new file mode 100644 index 000000000000..85bc2949aa95 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_fix-DDPCommHookType-python-3.13.patch @@ -0,0 +1,120 @@ +Test on Python 3.13 fails with +> AttributeError: 'functools.partial' object has no attribute 'value' + +Fix using https://github.com/pytorch/pytorch/pull/163939 + +diff --git a/torch/distributed/algorithms/ddp_comm_hooks/__init__.py b/torch/distributed/algorithms/ddp_comm_hooks/__init__.py +--- a/torch/distributed/algorithms/ddp_comm_hooks/__init__.py ++++ b/torch/distributed/algorithms/ddp_comm_hooks/__init__.py +@@ -1,7 +1,21 @@ + # mypy: allow-untyped-defs ++import sys + from enum import Enum + from functools import partial + ++ ++# To suppress FutureWarning from partial since 3.13 ++if sys.version_info >= (3, 13): ++ from enum import member ++ ++ def _enum_member(x): ++ return member(x) ++else: ++ ++ def _enum_member(x): ++ return x ++ ++ + import torch.distributed as dist + + from . import ( +@@ -51,45 +65,61 @@ class DDPCommHookType(Enum): + ``DDPCommHookType.ALLREDUCE.value(model=model, state=process_group)``. + """ + +- ALLREDUCE = partial(_ddp_comm_hook_wrapper, comm_hook=default.allreduce_hook) +- FP16_COMPRESS = partial( +- _ddp_comm_hook_wrapper, comm_hook=default.fp16_compress_hook ++ ALLREDUCE = _enum_member( ++ partial(_ddp_comm_hook_wrapper, comm_hook=default.allreduce_hook) ++ ) ++ FP16_COMPRESS = _enum_member( ++ partial(_ddp_comm_hook_wrapper, comm_hook=default.fp16_compress_hook) + ) +- BF16_COMPRESS = partial( +- _ddp_comm_hook_wrapper, comm_hook=default.bf16_compress_hook ++ BF16_COMPRESS = _enum_member( ++ partial(_ddp_comm_hook_wrapper, comm_hook=default.bf16_compress_hook) + ) +- QUANTIZE_PER_TENSOR = partial( +- _ddp_comm_hook_wrapper, comm_hook=quantization.quantization_pertensor_hook ++ QUANTIZE_PER_TENSOR = _enum_member( ++ partial( ++ _ddp_comm_hook_wrapper, comm_hook=quantization.quantization_pertensor_hook ++ ) + ) +- QUANTIZE_PER_CHANNEL = partial( +- _ddp_comm_hook_wrapper, comm_hook=quantization.quantization_perchannel_hook ++ QUANTIZE_PER_CHANNEL = _enum_member( ++ partial( ++ _ddp_comm_hook_wrapper, comm_hook=quantization.quantization_perchannel_hook ++ ) + ) +- POWER_SGD = partial( +- _powerSGD_comm_hook_wrapper, +- comm_hook=powerSGD.powerSGD_hook, +- matrix_approximation_rank=1, ++ POWER_SGD = _enum_member( ++ partial( ++ _powerSGD_comm_hook_wrapper, ++ comm_hook=powerSGD.powerSGD_hook, ++ matrix_approximation_rank=1, ++ ) + ) + # Rank-2 PowerSGD can give a higher accuracy than the default rank-1 version, + # but it runs slower and consumes more memory. +- POWER_SGD_RANK2 = partial( +- _powerSGD_comm_hook_wrapper, +- comm_hook=powerSGD.powerSGD_hook, +- matrix_approximation_rank=2, ++ POWER_SGD_RANK2 = _enum_member( ++ partial( ++ _powerSGD_comm_hook_wrapper, ++ comm_hook=powerSGD.powerSGD_hook, ++ matrix_approximation_rank=2, ++ ) + ) + # Batching can lead to a faster training at the cost of accuracy. +- BATCHED_POWER_SGD = partial( +- _powerSGD_comm_hook_wrapper, +- comm_hook=powerSGD.batched_powerSGD_hook, +- matrix_approximation_rank=1, ++ BATCHED_POWER_SGD = _enum_member( ++ partial( ++ _powerSGD_comm_hook_wrapper, ++ comm_hook=powerSGD.batched_powerSGD_hook, ++ matrix_approximation_rank=1, ++ ) + ) +- BATCHED_POWER_SGD_RANK2 = partial( +- _powerSGD_comm_hook_wrapper, +- comm_hook=powerSGD.batched_powerSGD_hook, +- matrix_approximation_rank=2, ++ BATCHED_POWER_SGD_RANK2 = _enum_member( ++ partial( ++ _powerSGD_comm_hook_wrapper, ++ comm_hook=powerSGD.batched_powerSGD_hook, ++ matrix_approximation_rank=2, ++ ) + ) +- NOOP = partial( +- _ddp_comm_hook_wrapper, +- comm_hook=debugging.noop_hook, ++ NOOP = _enum_member( ++ partial( ++ _ddp_comm_hook_wrapper, ++ comm_hook=debugging.noop_hook, ++ ) + ) + + diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-cutlass-addmm-test.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-cutlass-addmm-test.patch new file mode 100644 index 000000000000..aa7e88a859dc --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-cutlass-addmm-test.patch @@ -0,0 +1,16 @@ +The test fails with accuracy issues in at least H100, possibly on CUDA 12.8 in general. +See https://github.com/pytorch/pytorch/pull/156626 + +Author: Alexander Grund (TU Dresden) +diff --git a/test/inductor/test_cutlass_backend.py b/test/inductor/test_cutlass_backend.py +--- a/test/inductor/test_cutlass_backend.py ++++ b/test/inductor/test_cutlass_backend.py +@@ -613,7 +613,7 @@ class TestCutlassBackend(TestCase): + + torch.testing.assert_close(actual, expected, rtol=1e-2, atol=0.05) + +- @unittest.skipIf(not SM90OrLater, "need sm_90") ++ @unittest.skip("Fails on CUDA 12.8+") + @parametrize("dynamic", (False, True)) + @parametrize("use_aoti", (False, True)) + @parametrize("dtype", (torch.float16, torch.bfloat16)) From c30732ff0dac84c8011b256ff6904eb99f53c589 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Wed, 11 Mar 2026 12:39:51 +0100 Subject: [PATCH 26/30] Fix using wrong OpenMP library --- .../PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb | 3 ++ ...Torch-2.9.1_avoid-using-wrong-libomp.patch | 34 +++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_avoid-using-wrong-libomp.patch diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb index b5f59f393d67..b715adb4d5b1 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb @@ -65,6 +65,7 @@ patches = [ 'PyTorch-2.9.0_skip-tests-requiring-CUDA-12.8.patch', 'PyTorch-2.9.0_skip-unexpected-success-in-test_fake_export.patch', 'PyTorch-2.9.1_avoid-multiprocess-tests-hanging-forever.patch', + 'PyTorch-2.9.1_avoid-using-wrong-libomp.patch', 'PyTorch-2.9.1_check-device-avail-test_schedule.patch', 'PyTorch-2.9.1_disable-slow-tests.patch', 'PyTorch-2.9.1_dont-print-test-items.patch', @@ -153,6 +154,8 @@ checksums = [ '2e73f71ea0f09e613cc4a108893e7948b6daf239e3fe42fd2d3ae5d43c3cf9de'}, {'PyTorch-2.9.1_avoid-multiprocess-tests-hanging-forever.patch': '86ce380e69b3b20e010d817889cb1b825b05b4054a045b00f2ac12161b77d7e4'}, + {'PyTorch-2.9.1_avoid-using-wrong-libomp.patch': + '2fc2bb82cce87ba0ce73718b0502735ecdf360ca6bfac4482396f7f1c51c1866'}, {'PyTorch-2.9.1_check-device-avail-test_schedule.patch': '64c28d38ce69147565509add36d310473ce46f14a0a876d38b5049cb7fce9817'}, {'PyTorch-2.9.1_disable-slow-tests.patch': '6b365a3531b0ac5446b5f0e8ab924b5e5742cd0331e6d9ec979118a3ef0ffc09'}, diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_avoid-using-wrong-libomp.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_avoid-using-wrong-libomp.patch new file mode 100644 index 000000000000..a9b58a7d8e5c --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_avoid-using-wrong-libomp.patch @@ -0,0 +1,34 @@ +When using GCC `libgomp.so` should be used which will be automatically done with `-fopenmp`. +However the custom FindOpenMP searches for `libomp.so` first which ends up being used if found +e.g. on the system in /lib64 + +See https://github.com/pytorch/pytorch/pull/177126 + +Author: Alexander Grund (TU Dresden) + +diff --git a/cmake/Modules/FindOpenMP.cmake b/cmake/Modules/FindOpenMP.cmake +--- a/cmake/Modules/FindOpenMP.cmake ++++ b/cmake/Modules/FindOpenMP.cmake +@@ -289,21 +289,13 @@ function(_OPENMP_GET_FLAGS LANG FLAG_MODE OPENMP_FLAG_VAR OPENMP_LIB_NAMES_VAR) + mark_as_advanced(OpenMP_libomp_LIBRARY) + endif() + +- if (NOT OpenMP_libomp_LIBRARY) +- find_library(OpenMP_libomp_LIBRARY +- NAMES omp gomp iomp5 +- HINTS ${CMAKE_${LANG}_IMPLICIT_LINK_DIRECTORIES} +- DOC "libomp location for OpenMP" +- ) +- mark_as_advanced(OpenMP_libomp_LIBRARY) +- endif() +- + # Use OpenMP_PREFIX if defined + if (NOT OpenMP_libomp_LIBRARY AND NOT "${OpenMP_PREFIX}" STREQUAL "") + find_library(OpenMP_libomp_LIBRARY + NAMES omp gomp iomp5 + HINTS "${OpenMP_PREFIX}/lib" + DOC "libomp location for OpenMP" ++ NO_DEFAULT_PATH + ) + mark_as_advanced(OpenMP_libomp_LIBRARY) + endif() From 3b6de18ae1969e6d23ca0b4e7a167811afc6da9f Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Wed, 11 Mar 2026 16:29:45 +0100 Subject: [PATCH 27/30] Skip segfaulting flex_attention suite --- .../p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb | 3 +++ 1 file changed, 3 insertions(+) diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb index b715adb4d5b1..e453b7ec39fd 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb @@ -270,6 +270,9 @@ excluded_tests = { 'dynamo/test_utils', # Packaging test only, not important for us 'test_license', + # Occasional segfaults on CPU + 'inductor/test_flex_attention', + 'inductor/test_flex_decoding ', ] } From c2dde094b8b4a6ace7f7d6033f958672b71a5194 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Fri, 13 Mar 2026 17:15:21 +0100 Subject: [PATCH 28/30] Skip some tests failing on ARM --- .../PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb | 15 ++++ ...increase-tolerance-TestDecomp-matmul.patch | 32 +++++++++ ..._skip-cpu_repro-tests-failing-on-ARM.patch | 72 +++++++++++++++++++ ....1_skip-svd-pca-lowrank-tests-on-cpu.patch | 26 +++++++ ...skip-test_optree_graph_break_message.patch | 24 +++++++ ...ch-2.9.1_skip-tests-requiring-MKLDNN.patch | 38 ++++++++++ 6 files changed, 207 insertions(+) create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_increase-tolerance-TestDecomp-matmul.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-cpu_repro-tests-failing-on-ARM.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-svd-pca-lowrank-tests-on-cpu.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-test_optree_graph_break_message.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-tests-requiring-MKLDNN.patch diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb index e453b7ec39fd..665358d5cd94 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb @@ -87,6 +87,11 @@ patches = [ 'PyTorch-2.9.1_skip-test_norm_matrix_degenerate_shapes.patch', 'PyTorch-2.9.1_skip-tests-requiring-SM90.patch', 'PyTorch-2.9.1_skip-RingFlexAttentionTest.patch', + 'PyTorch-2.9.1_increase-tolerance-TestDecomp-matmul.patch', + 'PyTorch-2.9.1_skip-cpu_repro-tests-failing-on-ARM.patch', + 'PyTorch-2.9.1_skip-svd-pca-lowrank-tests-on-cpu.patch', + 'PyTorch-2.9.1_skip-test_optree_graph_break_message.patch', + 'PyTorch-2.9.1_skip-tests-requiring-MKLDNN.patch', ] checksums = [ {'pytorch-v2.9.1.tar.gz': 'e17504700ebc4c87f9b57059df1c4d790b769458c04db144c7a92aea90f2c92b'}, @@ -192,6 +197,16 @@ checksums = [ '7db02152db2ae70c0fd4c4602fe381e26a74b8e4f7b16b1a3554b2353d761b10'}, {'PyTorch-2.9.1_skip-RingFlexAttentionTest.patch': '3cf0b11136fb18c45072687eafd3024d91b504d231a4fa40e04bc62d8d6019c7'}, + {'PyTorch-2.9.1_increase-tolerance-TestDecomp-matmul.patch': + 'dd82203ce3b6262255aba6b59fb3b547c4c17875d5711f6d3d489aa8f0f59f32'}, + {'PyTorch-2.9.1_skip-cpu_repro-tests-failing-on-ARM.patch': + '99055fde02ca17c1db1cd72f41821387a50901d6cd947161cafa12257b3a1c5a'}, + {'PyTorch-2.9.1_skip-svd-pca-lowrank-tests-on-cpu.patch': + '4fc772293047dc737b99e232b8a8db904aa8e88e3c8b2bcc3602fb723941fb89'}, + {'PyTorch-2.9.1_skip-test_optree_graph_break_message.patch': + '2ef1ad424d5f12a4d0ae06938da623819596cee7c0fb4616008f27583c29494d'}, + {'PyTorch-2.9.1_skip-tests-requiring-MKLDNN.patch': + '03756a8069bad01018f422f41aa24c7c543519fd857db88a0c6de661976c8859'}, ] osdependencies = [OS_PKG_IBVERBS_DEV] diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_increase-tolerance-TestDecomp-matmul.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_increase-tolerance-TestDecomp-matmul.patch new file mode 100644 index 000000000000..9bd54ea4d8da --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_increase-tolerance-TestDecomp-matmul.patch @@ -0,0 +1,32 @@ + +TestDecompCPU.test_comprehensive___rmatmul___cpu_float32, TestDecompCPU.test_comprehensive_matmul_cpu_float32 fail with small tolerance issues: +> Expected 12.534862518310547 but got 12.534895896911621. +> Absolute difference: 3.337860107421875e-05 (up to 1e-05 allowed) +> Relative difference: 2.6628613616990456e-06 (up to 1.3e-06 allowed) + +Increase the tolerances slightly to make them pass. + +Author: Alexander Grund (TU Dresden) + +diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py +--- a/torch/testing/_internal/common_methods_invocations.py ++++ b/torch/testing/_internal/common_methods_invocations.py +@@ -14286,6 +14286,9 @@ op_db: list[OpInfo] = [ + DecorateInfo(toleranceOverride({torch.float32: tol(atol=0, rtol=1e-5)}), + 'TestCommon', 'test_noncontiguous_samples', + device_type='cpu'), ++ DecorateInfo(toleranceOverride({torch.float32: tol(atol=4e-5, rtol=3e-6)}), ++ "TestDecomp", "test_comprehensive", device_type="cpu", ++ ), + DecorateInfo( + toleranceOverride({ + torch.float32: tol(atol=1e-5, rtol=1e-5), +@@ -17690,6 +17693,8 @@ op_db: list[OpInfo] = [ + 'TestMathBits', 'test_conj_view'), + DecorateInfo(toleranceOverride({torch.float32: tol(atol=1e-05, rtol=1.2e-03)}), + 'TestCommon', 'test_noncontiguous_samples'), ++ DecorateInfo(toleranceOverride({torch.float32: tol(atol=4e-05, rtol=3e-06)}), ++ "TestDecomp", "test_comprehensive", device_type="cpu"), + DecorateInfo(toleranceOverride({torch.complex64: tol(atol=1e-05, rtol=1e-05)}), + "TestDecomp", "test_comprehensive", device_type="cuda", + active_if=TEST_WITH_ROCM), diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-cpu_repro-tests-failing-on-ARM.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-cpu_repro-tests-failing-on-ARM.patch new file mode 100644 index 000000000000..ca205deb257e --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-cpu_repro-tests-failing-on-ARM.patch @@ -0,0 +1,72 @@ +Those tests fail with precision issues on ARM which seems to be known: +https://github.com/pytorch/pytorch/pull/171095 + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/inductor/test_cpu_repro.py b/test/inductor/test_cpu_repro.py +--- a/test/inductor/test_cpu_repro.py ++++ b/test/inductor/test_cpu_repro.py +@@ -31,6 +31,7 @@ from torch.fx.experimental.proxy_tensor import make_fx + from torch.nn import functional as F + from torch.testing._internal.common_utils import ( + instantiate_parametrized_tests, ++ IS_ARM64, + IS_FBCODE, + IS_MACOS, + parametrize, +@@ -3245,6 +3246,7 @@ class CPUReproTests(TestCase): + 3, + ) + ++ @unittest.skipIf(IS_ARM64, "Fails on ARM") + @config.patch({"fx_graph_cache": False, "fx_graph_remote_cache": False}) + def test_two_local_buffers_in_outer_loop_fusion(self): + def fn(x): +@@ -3568,6 +3570,7 @@ class CPUReproTests(TestCase): + self.common(m, (x,)) + check_metrics_vec_kernel_count(6) + ++ @unittest.skipIf(IS_ARM64, "Fails on ARM") + @requires_vectorization + @config.patch("cpp.enable_tiling_heuristics", False) + def test_transpose_copy(self): +@@ -3812,6 +3815,7 @@ class CPUReproTests(TestCase): + self.common(fn, (x, y)) + check_metrics_vec_kernel_count(2) + ++ @unittest.skipIf(IS_ARM64, "Fails on ARM") + def test_transpose_mxn_16_16_bf16_fp16(self): + def fn(a, b): + c = a * b +@@ -3885,6 +3889,7 @@ class CPUReproTests(TestCase): + x = torch.rand(4, 5) + self.common(f, (x,)) + ++ @unittest.skipIf(IS_ARM64, "Fails on ARM") + def test_broadcast_scalar_cpp_tile_2d_kernel(self): + # Based on detectron2_maskrcnn backbone (conv2d -> max_pool2d) + s0 = 12 +@@ -4384,6 +4389,7 @@ class CPUReproTests(TestCase): + y = torch.randint(0, 255, (3, 3), dtype=torch.uint8) + self.common(fn, (x, y)) + ++ @unittest.skipIf(IS_ARM64, "Fails on ARM") + def test_float32_to_uint8(self): + # https://github.com/pytorch/pytorch/issues/156788 + @torch.compile +@@ -4868,6 +4874,7 @@ class CPUReproTests(TestCase): + x = torch.randn(1, 4, 2, 2) + self.common(fn, (x,)) + ++ @unittest.skipIf(IS_ARM64, "Fails on ARM") + @parametrize("is_inference", (True, False)) + def test_disabled_amp(self, is_inference): + class M(torch.nn.Module): +@@ -5367,6 +5374,7 @@ class CPUReproTests(TestCase): + code + ) + ++ @unittest.skipIf(IS_ARM64, "Fails on ARM") + @config.patch(freezing=True) + def test_add_layernorm(self): + class Model(torch.nn.Module): diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-svd-pca-lowrank-tests-on-cpu.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-svd-pca-lowrank-tests-on-cpu.patch new file mode 100644 index 000000000000..f0934960ac62 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-svd-pca-lowrank-tests-on-cpu.patch @@ -0,0 +1,26 @@ +On ARM those tests fail with +> torch._C._LinAlgError: linalg.svd: The algorithm failed to converge because the input matrix contained non-finite values. + +Traced to OpenBLAS with a fix in OpenBLAS 0.3.30, see https://github.com/pytorch/pytorch/issues/142131 + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/test_linalg.py b/test/test_linalg.py +--- a/test/test_linalg.py ++++ b/test/test_linalg.py +@@ -2674,6 +2674,7 @@ class TestLinalg(TestCase): + self.assertRaisesRegex(RuntimeError, "must be different", torch.norm, x, "nuc", (0, 0)) + self.assertRaisesRegex(IndexError, "Dimension out of range", torch.norm, x, "nuc", (0, 2)) + ++ @onlyCUDA + @skipCUDAIfNoCusolver + @skipCPUIfNoLapack + @dtypes(torch.double, torch.cdouble) +@@ -9383,6 +9384,7 @@ scipy_lobpcg | {eq_err_scipy:10.2e} | {eq_err_general_scipy:10.2e} | {iters2: + + run_test((1, 1), (1, 1, 1025)) + ++ @onlyCUDA + @skipCUDAIfNoCusolver + @skipCPUIfNoLapack + def test_pca_lowrank(self, device): diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-test_optree_graph_break_message.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-test_optree_graph_break_message.patch new file mode 100644 index 000000000000..5eec8929e5db --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-test_optree_graph_break_message.patch @@ -0,0 +1,24 @@ +Test fails with output mismatch: +> - Explanation: Dynamo cannot trace optree C/C++ function optree._C.PyCapsule.flatten. +> + Explanation: Dynamo cannot trace optree C/C++ function optree._C.pybind11_detail_function_record_v1_system_libstdcpp_gxx_abi_1xxx_use_cxx11_abi_1.flatten. +> Hint: Consider using torch.utils._pytree - https://github.com/pytorch/pytorch/blob/main/torch/utils/_pytree.py +> +> - Developer debug context: module: optree._C, qualname: PyCapsule.flatten, skip reason: +> + Developer debug context: module: optree._C, qualname: pybind11_detail_function_record_v1_system_libstdcpp_gxx_abi_1xxx_use_cxx11_abi_1.flatten, skip reason: + +Seems to be related to pybind11 version, GCC version, ... + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/dynamo/test_error_messages.py b/test/dynamo/test_error_messages.py +--- a/test/dynamo/test_error_messages.py ++++ b/test/dynamo/test_error_messages.py +@@ -461,7 +461,7 @@ from user code: + warnings.warn("test")""", + ) + +- @unittest.skipIf(not python_pytree._cxx_pytree_exists, "missing optree package") ++ @unittest.skip("Failes depending on Pybind11/GCC versions") + def test_optree_graph_break_message(self): + import optree + diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-tests-requiring-MKLDNN.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-tests-requiring-MKLDNN.patch new file mode 100644 index 000000000000..65cc3882ef63 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_skip-tests-requiring-MKLDNN.patch @@ -0,0 +1,38 @@ +test_int8_woq_mm fail without MKLDDN at +> self.assertEqual(counters["inductor"]["cpp_templated_kernel_counter"], 1) + +See https://github.com/pytorch/pytorch/pull/177387 + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/inductor/test_cpu_select_algorithm.py b/test/inductor/test_cpu_select_algorithm.py +--- a/test/inductor/test_cpu_select_algorithm.py ++++ b/test/inductor/test_cpu_select_algorithm.py +@@ -50,6 +50,11 @@ run_and_get_cpp_code = test_torchinductor.run_and_get_cpp_code + + aten = torch.ops.aten + ++skipIfNoMkldnn = unittest.skipIf( ++ not (torch.backends.mkldnn.enabled and torch.backends.mkldnn.is_available()), ++ "no MKLDNN", ++) ++ + + def patches(fn): + def skip_cache(self, choices, name, key, benchmark, hint_override=None): +@@ -1374,6 +1379,7 @@ class TestSelectAlgorithm(BaseTestSelectAlgorithm): + @inductor_config.patch({"freezing": True}) + @patches + @torch.no_grad ++ @skipIfNoMkldnn + @dtypes(torch.bfloat16) + @parametrize( + "batch_size", +@@ -1437,6 +1443,7 @@ class TestSelectAlgorithm(BaseTestSelectAlgorithm): + @inductor_config.patch({"freezing": True, "cpp.enable_concat_linear": True}) + @patches + @torch.no_grad ++ @skipIfNoMkldnn + @dtypes(torch.bfloat16) + @parametrize( + "batch_size", From 92cb18deebd88fbbd5752559b54942b4de489f9e Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Fri, 20 Mar 2026 11:18:03 +0100 Subject: [PATCH 29/30] Fix Python 3.13 compat --- .../PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb | 3 +++ ....9.1_fix-pickle-error-on-Python-3.13.patch | 23 +++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_fix-pickle-error-on-Python-3.13.patch diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb index 665358d5cd94..72bb3abaa247 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb @@ -72,6 +72,7 @@ patches = [ 'PyTorch-2.9.1_fix-DDPCommHookType-python-3.13.patch', 'PyTorch-2.9.1_fix-hypothesis-deadline.patch', 'PyTorch-2.9.1_fix-iteration-in-fligh-reporter.patch', + 'PyTorch-2.9.1_fix-pickle-error-on-Python-3.13.patch', 'PyTorch-2.9.1_fix-TestExportOpInfoCPU-with-single-GPU.patch', 'PyTorch-2.9.1_fix-test_dist2-decorators.patch', 'PyTorch-2.9.1_fix-test_recursion_in_except_handler.patch', @@ -171,6 +172,8 @@ checksums = [ 'f7a130669eee9924a303df9e2bd5743ff023a7d994b7a3e43c86dcccf0206c49'}, {'PyTorch-2.9.1_fix-iteration-in-fligh-reporter.patch': 'ab408275ec66e836112a50054acc4e789ef38196efeb6137c6061d60d9ac9ead'}, + {'PyTorch-2.9.1_fix-pickle-error-on-Python-3.13.patch': + '88807b5564485968de3be6411d33c257c5ce59f5d3db23c7aeba884458102d57'}, {'PyTorch-2.9.1_fix-TestExportOpInfoCPU-with-single-GPU.patch': 'bdddf5a9ba47d57ec96f4bbefc3b85c4904e44de93dc5c7a65bc03e343035ae9'}, {'PyTorch-2.9.1_fix-test_dist2-decorators.patch': diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_fix-pickle-error-on-Python-3.13.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_fix-pickle-error-on-Python-3.13.patch new file mode 100644 index 000000000000..7656cd8c5d5f --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1_fix-pickle-error-on-Python-3.13.patch @@ -0,0 +1,23 @@ +Avoid "cannot pickle code objects" on Python 3.13+ + +Extracted from https://github.com/pytorch/pytorch/pull/177713 +diff --git a/torch/distributed/checkpoint/api.py b/torch/distributed/checkpoint/api.py +--- a/torch/distributed/checkpoint/api.py ++++ b/torch/distributed/checkpoint/api.py +@@ -8,7 +8,15 @@ __all__ = ["CheckpointException"] + + + def _wrap_exception(exc: BaseException) -> WRAPPED_EXCEPTION: +- return (exc, tb.extract_tb(exc.__traceback__)) ++ summary = tb.extract_tb(exc.__traceback__) ++ # Python 3.13+ stores bytecode objects in FrameSummary._code, ++ # which cannot be pickled. Clear them so gather_object succeeds ++ # and the real exception is reported instead of a misleading ++ # "cannot pickle code objects" TypeError. ++ for frame in summary: ++ if hasattr(frame, "_code"): ++ object.__setattr__(frame, "_code", None) ++ return (exc, summary) + + + def _is_wrapped_exception(obj: Any) -> bool: From e83332d2d2755bf68e90e829756f2fbf17ae67c0 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Thu, 16 Apr 2026 18:04:01 +0200 Subject: [PATCH 30/30] Disable sanity_check_pip_list --- .../p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb index 72bb3abaa247..e38d4e84ff16 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2025b-CUDA-12.9.1.eb @@ -314,4 +314,6 @@ modextrapaths = {'TORCHINDUCTOR_CUTLASS_DIR': 'extra/cutlass'} tests = ['PyTorch-check-cpp-extension.py', 'PyTorch-check-cutlass.py'] +sanity_check_pip_list = False + moduleclass = 'ai'