diff --git a/easybuild/easyconfigs/p/PuLP/PuLP-2.9.0-foss-2023b.eb b/easybuild/easyconfigs/p/PuLP/PuLP-2.9.0-foss-2023b.eb new file mode 100644 index 000000000000..8b028268db66 --- /dev/null +++ b/easybuild/easyconfigs/p/PuLP/PuLP-2.9.0-foss-2023b.eb @@ -0,0 +1,26 @@ +easyblock = 'PythonPackage' + +name = 'PuLP' +version = '2.9.0' + +homepage = 'https://github.com/coin-or/pulp' +description = """ +PuLP is an LP modeler written in Python. PuLP can generate MPS or LP files and +call GLPK, COIN-OR CLP/CBC, CPLEX, GUROBI, MOSEK, XPRESS, CHOCO, MIPCL, SCIP to +solve linear problems. +""" + +toolchain = {'name': 'foss', 'version': '2023b'} + +sources = [SOURCELOWER_TAR_GZ] +checksums = ['2e30e6c0ef2c0edac185220e3e53faca62eb786a9bd68465208f05bc63e850f3'] + +dependencies = [ + ('Python', '3.11.5'), + ('GLPK', '5.0'), + ('Cbc', '2.10.11'), + # Gurobi requires a seperate license + # ('Gurobi', '9.5.0'), +] + +moduleclass = 'tools' diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0-foss-2023b.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0-foss-2023b.eb new file mode 100644 index 000000000000..c0b30959612f --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0-foss-2023b.eb @@ -0,0 +1,214 @@ +name = 'PyTorch' +version = '2.6.0' + +homepage = 'https://pytorch.org/' +description = """Tensors and Dynamic neural networks in Python with strong GPU acceleration. +PyTorch is a deep learning framework that puts Python first.""" + +toolchain = {'name': 'foss', 'version': '2023b'} + +source_urls = [GITHUB_RELEASE] +sources = ['%(namelower)s-v%(version)s.tar.gz'] +patches = [ + 'PyTorch-1.7.0_disable-dev-shm-test.patch', + 'PyTorch-1.12.1_add-hypothesis-suppression.patch', + 'PyTorch-1.12.1_fix-TestTorch.test_to.patch', + 'PyTorch-1.13.1_fix-gcc-12-warning-in-fbgemm.patch', + 'PyTorch-1.13.1_skip-failing-singular-grad-test.patch', + 'PyTorch-2.0.1_avoid-test_quantization-failures.patch', + 'PyTorch-2.0.1_skip-failing-gradtest.patch', + 'PyTorch-2.0.1_skip-test_shuffle_reproducibility.patch', + 'PyTorch-2.0.1_skip-tests-skipped-in-subprocess.patch', + 'PyTorch-2.1.0_remove-test-requiring-online-access.patch', + 'PyTorch-2.1.0_skip-dynamo-test_predispatch.patch', + 'PyTorch-2.1.2_workaround_dynamo_failure_without_nnpack.patch', + 'PyTorch-2.3.0_disable_test_linear_package_if_no_half_types_are_available.patch', + 'PyTorch-2.3.0_fix-mkldnn-avx512-f32-bias.patch', + 'PyTorch-2.3.0_skip_test_var_mean_differentiable.patch', + 'PyTorch-2.6.0_add-checkfunctionexists-include.patch', + 'PyTorch-2.6.0_avoid_caffe2_test_cpp_jit.patch', + 'PyTorch-2.6.0_disable_DataType_dependent_test_if_tensorboard_is_not_available.patch', + 'PyTorch-2.6.0_disable_tests_which_need_network_download.patch', + 'PyTorch-2.6.0_disable-gcc12-warnings.patch', + 'PyTorch-2.6.0_fix-accuracy-issues-in-linalg_solve.patch', + 'PyTorch-2.6.0_fix-distributed-tests-without-gpus.patch', + 'PyTorch-2.6.0_fix-edge-case-causing-test_trigger_bisect_on_error-failure.patch', + 'PyTorch-2.6.0_fix-ExcTests.test_trigger_on_error.patch', + 'PyTorch-2.6.0_fix-flaky-test_aot_export_with_torch_cond.patch', + 'PyTorch-2.6.0_fix-inductor-device-interface.patch', + 'PyTorch-2.6.0_fix-server-in-test_control_plane.patch', + 'PyTorch-2.6.0_fix-skip-decorators.patch', + 'PyTorch-2.6.0_fix-test_autograd_cpp_node_saved_float.patch', + 'PyTorch-2.6.0_fix-test_linear_with_embedding.patch', + 'PyTorch-2.6.0_fix-test_linear_with_in_out_buffer-without-mkl.patch', + 'PyTorch-2.6.0_fix-test_public_bindings.patch', + 'PyTorch-2.6.0_fix-test_unbacked_bindings_for_divisible_u_symint.patch', + 'PyTorch-2.6.0_fix-vsx-vector-shift-functions.patch', + 'PyTorch-2.6.0_fix-xnnpack-float16-convert.patch', + 'PyTorch-2.6.0_increase-tolerance-test_aotdispatch-matmul.patch', + 'PyTorch-2.6.0_increase-tolerance-test_quick-baddbmm.patch', + 'PyTorch-2.6.0_increase-tolerance-test_vmap_autograd_grad.patch', + 'PyTorch-2.6.0_remove-test_slice_with_floordiv.patch', + 'PyTorch-2.6.0_skip-diff-test-on-ppc.patch', + 'PyTorch-2.6.0_skip-test_checkpoint_wrapper_parity-on-cpu.patch', + 'PyTorch-2.6.0_skip-test_init_from_local_shards.patch', + 'PyTorch-2.6.0_skip-test_jvp_linalg_det_singular.patch', + 'PyTorch-2.6.0_skip-test-requiring-MKL.patch', + 'PyTorch-2.6.0_skip-test_segfault.patch', + 'PyTorch-2.6.0_skip-tests-without-fbgemm.patch', +] +checksums = [ + {'pytorch-v2.6.0.tar.gz': '3005690eb7b083c443a38c7657938af63902f524ad87a6c83f1aca38c77e3b57'}, + {'PyTorch-1.7.0_disable-dev-shm-test.patch': '622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a'}, + {'PyTorch-1.12.1_add-hypothesis-suppression.patch': + 'e71ffb94ebe69f580fa70e0de84017058325fdff944866d6bd03463626edc32c'}, + {'PyTorch-1.12.1_fix-TestTorch.test_to.patch': '75f27987c3f25c501e719bd2b1c70a029ae0ee28514a97fe447516aee02b1535'}, + {'PyTorch-1.13.1_fix-gcc-12-warning-in-fbgemm.patch': + '5c7be91a6096083a0b1315efe0001537499c600f1f569953c6a2c7f4cc1d0910'}, + {'PyTorch-1.13.1_skip-failing-singular-grad-test.patch': + '72688a57b2bb617665ad1a1d5e362c5111ae912c10936bb38a089c0204729f48'}, + {'PyTorch-2.0.1_avoid-test_quantization-failures.patch': + '02e3f47e4ed1d7d6077e26f1ae50073dc2b20426269930b505f4aefe5d2f33cd'}, + {'PyTorch-2.0.1_skip-failing-gradtest.patch': '8030bdec6ba49b057ab232d19a7f1a5e542e47e2ec340653a246ec9ed59f8bc1'}, + {'PyTorch-2.0.1_skip-test_shuffle_reproducibility.patch': + '7047862abc1abaff62954da59700f36d4f39fcf83167a638183b1b7f8fec78ae'}, + {'PyTorch-2.0.1_skip-tests-skipped-in-subprocess.patch': + '166c134573a95230e39b9ea09ece3ad8072f39d370c9a88fb2a1e24f6aaac2b5'}, + {'PyTorch-2.1.0_remove-test-requiring-online-access.patch': + '35184b8c5a1b10f79e511cc25db3b8a5585a5d58b5d1aa25dd3d250200b14fd7'}, + {'PyTorch-2.1.0_skip-dynamo-test_predispatch.patch': + '6298daf9ddaa8542850eee9ea005f28594ab65b1f87af43d8aeca1579a8c4354'}, + {'PyTorch-2.1.2_workaround_dynamo_failure_without_nnpack.patch': + 'fb96eefabf394617bbb3fbd3a7a7c1aa5991b3836edc2e5d2a30e708bfe49ba1'}, + {'PyTorch-2.3.0_disable_test_linear_package_if_no_half_types_are_available.patch': + '23416f2d9d5226695ec3fbea0671e3650c655c19deefd3f0f8ddab5afa50f485'}, + {'PyTorch-2.3.0_fix-mkldnn-avx512-f32-bias.patch': + 'ee07d21c3ac7aeb0bd0e39507b18a417b9125284a529102929c4b5c6727c2976'}, + {'PyTorch-2.3.0_skip_test_var_mean_differentiable.patch': + '9703fd0f1fca8916f6d79d83e9a7efe8e3f717362a5fdaa8f5d9da90d0c75018'}, + {'PyTorch-2.6.0_add-checkfunctionexists-include.patch': + '93579e35e946fb06025a50c42f3625ed8b8ac9f503a963cc23767e2c8869f0ea'}, + {'PyTorch-2.6.0_avoid_caffe2_test_cpp_jit.patch': + '88d03d90359bc1fe3cfa3562624d4fbfd4c6654c9199c556ca912ac55289ce55'}, + {'PyTorch-2.6.0_disable_DataType_dependent_test_if_tensorboard_is_not_available.patch': + '74db866787f1e666ed3b35db5204f05a0ba8d989fb23057a72dd07928388dc46'}, + {'PyTorch-2.6.0_disable_tests_which_need_network_download.patch': + 'fe76129811e4eb24d0e12c397335a4c7971b0c4e48ce9cdb9169f3ef9de7aac4'}, + {'PyTorch-2.6.0_disable-gcc12-warnings.patch': '892643650788b743106ebe4e70c68be42a756eba797f0f79e31708d6e008a620'}, + {'PyTorch-2.6.0_fix-accuracy-issues-in-linalg_solve.patch': + 'a6b1cfe8f03ad5b17437e04e6a0369a25fcc79eed939ce6912ceca1c0ab0f444'}, + {'PyTorch-2.6.0_fix-distributed-tests-without-gpus.patch': + '011cffc098b6818eb160b6bec2e671dec46cb2a8457ce32144ea01cc9ed4290a'}, + {'PyTorch-2.6.0_fix-edge-case-causing-test_trigger_bisect_on_error-failure.patch': + 'fd918fa510bf04c95f3bcc2f4abea417632a0fefb278154ec95207ca0d1719ed'}, + {'PyTorch-2.6.0_fix-ExcTests.test_trigger_on_error.patch': + '445472d43a61523b2ed169023f5f6db197bc2df8408f59e6254e55f5cb1d3a11'}, + {'PyTorch-2.6.0_fix-flaky-test_aot_export_with_torch_cond.patch': + '79cf77a795e06c4c3206a998ce8f4a92072f79736803008ede65e5ec2f204bfc'}, + {'PyTorch-2.6.0_fix-inductor-device-interface.patch': + 'e8e6af1ea5f01568c23127d4f83aacb482ec9005ba558b68763748a581bcc5bc'}, + {'PyTorch-2.6.0_fix-server-in-test_control_plane.patch': + '1337689ff28ecaa8d1d0edf60d322bcdd7846fec040925325d357b19eb6e4342'}, + {'PyTorch-2.6.0_fix-skip-decorators.patch': 'ec1ba1ef2a2b2c6753a0b35d10c6af0457fc90fe98e2f77979745d9f79d79c86'}, + {'PyTorch-2.6.0_fix-test_autograd_cpp_node_saved_float.patch': + '928c4b1dc16f3d4a7bec29d8749b89ebd41488845938e2514c7fa8c048950e33'}, + {'PyTorch-2.6.0_fix-test_linear_with_embedding.patch': + '56c053de7cfaa2f9898c3b036a185b499f5d44a7b4cd0442c45a8c94928322bf'}, + {'PyTorch-2.6.0_fix-test_linear_with_in_out_buffer-without-mkl.patch': + '8cf9e5d434eb8d3b81400622ca23714c7002a0b835e7e08b384b84408c7ed085'}, + {'PyTorch-2.6.0_fix-test_public_bindings.patch': + '066d88acd8156ed3f91b6a8e924de57f8aef944aa1bf67dc453b830ee1c26094'}, + {'PyTorch-2.6.0_fix-test_unbacked_bindings_for_divisible_u_symint.patch': + '5f5ce1e275888cd6a057a0769fffaa9e49dde003ba191fd70b0265d8c6259a9b'}, + {'PyTorch-2.6.0_fix-vsx-vector-shift-functions.patch': + '82ce0b48e3b7c3dfd3a2ba915f4675d5c3a6d149646e1e0d6a29eedbbaecc8bd'}, + {'PyTorch-2.6.0_fix-xnnpack-float16-convert.patch': + 'a6fcb475040c6fed2c0ec8b3f9c1e9fb964220413e84c8f2ee4092770ee6ac7d'}, + {'PyTorch-2.6.0_increase-tolerance-test_aotdispatch-matmul.patch': + 'c1c6ea41504e4479d258225ecefc7e9c5726934601610904ae555501a11e9109'}, + {'PyTorch-2.6.0_increase-tolerance-test_quick-baddbmm.patch': + '9850facdfb5d98451249570788217ede07466cae9ba52cd03afd3ec803ba33c9'}, + {'PyTorch-2.6.0_increase-tolerance-test_vmap_autograd_grad.patch': + '8d5eb53bb0a1456af333ae646c860033d6dd037bd9152601a200ca5c10ebf3cb'}, + {'PyTorch-2.6.0_remove-test_slice_with_floordiv.patch': + '1b7ff59a595b9ebbc042d8ff53e3f6c72a1d3b04fb82228f4433473f28623f9b'}, + {'PyTorch-2.6.0_skip-diff-test-on-ppc.patch': '6f2f87cad1b0ab8c5a0c7b3f7fbc14e4bdfbe61da26a3934ded9dda7fe368c74'}, + {'PyTorch-2.6.0_skip-test_checkpoint_wrapper_parity-on-cpu.patch': + '600f74de167b6fea4d849229de6d653dc616093b456962729222d6bfa767a8e8'}, + {'PyTorch-2.6.0_skip-test_init_from_local_shards.patch': + '222383195f6a3b7c545ffeadb4dd469b9f3361b42c0866de3d3f0f91f8fbe777'}, + {'PyTorch-2.6.0_skip-test_jvp_linalg_det_singular.patch': + '84abe4769005bbb794852a1b3de370977c9efa351bac4b4775831a02bb18d898'}, + {'PyTorch-2.6.0_skip-test-requiring-MKL.patch': + 'f1c9b1c77b09d59317fd52d390e7d948a147325b927ad6373c1fa1d1d6ea1ea8'}, + {'PyTorch-2.6.0_skip-test_segfault.patch': '26806bd62e6b61b56ebaa52d68ca44c415a28124f684bd2fb373557ada68ef52'}, + {'PyTorch-2.6.0_skip-tests-without-fbgemm.patch': + 'ed35099de94a14322a879066da048ec9bc565dc81287b4adc4fec46f9afe90cf'}, +] + +osdependencies = [OS_PKG_IBVERBS_DEV] + +builddependencies = [ + ('CMake', '3.27.6'), + ('hypothesis', '6.90.0'), + # For tests + ('parameterized', '0.9.0'), + ('pytest-flakefinder', '1.1.0'), + ('pytest-rerunfailures', '14.0'), + ('pytest-shard', '0.1.2'), + ('pytest-subtests', '0.13.1'), + ('tlparse', '0.3.5'), + ('optree', '0.13.0'), + ('unittest-xml-reporting', '3.1.0'), +] + +dependencies = [ + ('Ninja', '1.11.1'), # Required for JIT compilation of C++ extensions + ('Python', '3.11.5'), + ('Python-bundle-PyPI', '2023.10'), + ('protobuf', '25.3'), + ('protobuf-python', '4.25.3'), + ('PuLP', '2.9.0'), + ('pybind11', '2.11.1'), + ('SciPy-bundle', '2023.11'), + ('PyYAML', '6.0.1'), + ('MPFR', '4.2.1'), + ('GMP', '6.3.0'), + ('numactl', '2.0.16'), + ('FFmpeg', '6.0'), + ('Pillow', '10.2.0'), + ('expecttest', '0.2.1'), + ('networkx', '3.2.1'), + ('typing-extensions', '4.10.0'), + ('sympy', '1.12'), + ('Z3', '4.13.0',), +] + +buildcmd = '%(python)s setup.py build' # Run the (long) build in the build step + +excluded_tests = { + '': [ + # This test seems to take too long on NVIDIA Ampere at least. + 'distributed/test_distributed_spawn', + # no xdoctest + 'doctests', + # intermittent failures on various systems + # See https://github.com/easybuilders/easybuild-easyconfigs/issues/17712 + 'distributed/rpc/test_tensorpipe_agent', + # This test is expected to fail when run in their CI, but won't in our case. + # It just checks for a "CI" env variable + 'test_ci_sanity_check_fail', + ] +} + +local_test_opts = '--continue-through-error --pipe-logs --verbose %(excluded_tests)s' +runtest = 'cd test && PYTHONUNBUFFERED=1 %(python)s run_test.py ' + local_test_opts + +# Especially test_quantization has a few corner cases that are triggered by the random input values, +# those cannot be easily avoided, see https://github.com/pytorch/pytorch/issues/107030 +# So allow a low number of tests to fail as the tests "usually" succeed +max_failed_tests = 6 + +tests = ['PyTorch-check-cpp-extension.py'] + +moduleclass = 'ai' diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_add-checkfunctionexists-include.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_add-checkfunctionexists-include.patch new file mode 100644 index 000000000000..73a90f777ee5 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_add-checkfunctionexists-include.patch @@ -0,0 +1,25 @@ +From 8d91bfd9654589c41b3bbb589bcb0bf95443c53e Mon Sep 17 00:00:00 2001 +From: Nikita Shulga +Date: Tue, 28 Jan 2025 08:40:31 -0800 +Subject: [PATCH] [BE] Include CheckFunctionExists in `FindBLAS.cmake` + (#145849) + +It's used in the script, so it must be included +Pull Request resolved: https://github.com/pytorch/pytorch/pull/145849 +Approved by: https://github.com/Skylion007 +--- + cmake/Modules/FindBLAS.cmake | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/cmake/Modules/FindBLAS.cmake b/cmake/Modules/FindBLAS.cmake +index 5ce875f529206..8e54eedb2aa8f 100644 +--- a/cmake/Modules/FindBLAS.cmake ++++ b/cmake/Modules/FindBLAS.cmake +@@ -25,6 +25,7 @@ SET(WITH_BLAS "" CACHE STRING "Blas type [accelerate/acml/atlas/blis/generic/got + # Old FindBlas + INCLUDE(CheckCSourceRuns) + INCLUDE(CheckFortranFunctionExists) ++INCLUDE(CheckFunctionExists) + + MACRO(Check_Fortran_Libraries LIBRARIES _prefix _name _flags _list) + # This macro checks for the existence of the combination of fortran libraries diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_avoid_caffe2_test_cpp_jit.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_avoid_caffe2_test_cpp_jit.patch new file mode 100644 index 000000000000..7d72fa1a1e14 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_avoid_caffe2_test_cpp_jit.patch @@ -0,0 +1,66 @@ +Avoid tripping on //caffe2/test/cpp/jit:test_custom_class_registrations with IS_SANDCASTLE + +Author: Ake Sandgren +Update for 2.6: Alexander Grund (TU Dresden) + +diff --git a/test/export/test_export.py b/test/export/test_export.py +index 703a0c6e918..91892503955 100755 +--- a/test/export/test_export.py ++++ b/test/export/test_export.py +@@ -11324,7 +11324,7 @@ class TestExportCustomClass(TorchTestCase): + def setUp(self): + if IS_FBCODE: + lib_file_path = "//caffe2/test/cpp/jit:test_custom_class_registrations" +- elif IS_SANDCASTLE or IS_MACOS: ++ elif False or IS_MACOS: + raise unittest.SkipTest("non-portable load_library call used in test") + elif IS_WINDOWS: + lib_file_path = find_library_location("torchbind_test.dll") +diff --git a/test/export/test_lift_unlift.py b/test/export/test_lift_unlift.py +index c027fc55717..17358101b8c 100644 +--- a/test/export/test_lift_unlift.py ++++ b/test/export/test_lift_unlift.py +@@ -147,7 +147,7 @@ class TestLift(TestCase): + def setUp(self): + if IS_MACOS: + raise unittest.SkipTest("non-portable load_library call used in test") +- elif IS_SANDCASTLE or IS_FBCODE: ++ elif False or IS_FBCODE: + torch.ops.load_library( + "//caffe2/test/cpp/jit:test_custom_class_registrations" + ) +@@ -380,7 +380,7 @@ class ConstantAttrMapTest(TestCase): + def setUp(self): + if IS_MACOS: + raise unittest.SkipTest("non-portable load_library call used in test") +- elif IS_SANDCASTLE or IS_FBCODE: ++ elif False or IS_FBCODE: + torch.ops.load_library( + "//caffe2/test/cpp/jit:test_custom_class_registrations" + ) +diff --git a/test/test_weak.py b/test/test_weak.py +index e8b6ee6f556..a6b3f0e052d 100644 +--- a/test/test_weak.py ++++ b/test/test_weak.py +@@ -593,7 +593,7 @@ class WeakKeyDictionaryScriptObjectTestCase(TestCase): + + def __init__(self, *args, **kw): + unittest.TestCase.__init__(self, *args, **kw) +- if IS_SANDCASTLE or IS_FBCODE: ++ if False or IS_FBCODE: + torch.ops.load_library( + "//caffe2/test/cpp/jit:test_custom_class_registrations" + ) +diff --git a/torch/testing/_internal/torchbind_impls.py b/torch/testing/_internal/torchbind_impls.py +index 5566b241f56..63159276572 100644 +--- a/torch/testing/_internal/torchbind_impls.py ++++ b/torch/testing/_internal/torchbind_impls.py +@@ -113,7 +113,7 @@ def load_torchbind_test_lib(): + IS_WINDOWS, + ) + +- if IS_SANDCASTLE or IS_FBCODE: ++ if False or IS_FBCODE: + torch.ops.load_library("//caffe2/test/cpp/jit:test_custom_class_registrations") + elif IS_MACOS: + raise unittest.SkipTest("non-portable load_library call used in test") diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_disable-gcc12-warnings.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_disable-gcc12-warnings.patch new file mode 100644 index 000000000000..6f1f2bd578f5 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_disable-gcc12-warnings.patch @@ -0,0 +1,20 @@ +GCC 12 has a false positive warning when compiled for some architectures, e.g. Intel Sapphire Rapids. +See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112370 + +Suppress this warning such that the build doesn't error. +Also disable a false positive that produces a lot of warnings/output. + +Author: Alexander Grund (TU Dresden) +diff --git a/CMakeLists.txt b/CMakeLists.txt +index b74bf4536f4..bb062fa843a 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -653,6 +653,8 @@ if(MSVC) + string(APPEND CMAKE_CXX_FLAGS " /FS") + string(APPEND CMAKE_CUDA_FLAGS " -Xcompiler /FS") + endif(MSVC) ++append_cxx_flag_if_supported("-Wno-free-nonheap-object" CMAKE_CXX_FLAGS) ++append_cxx_flag_if_supported("-Wno-dangling-reference" CMAKE_CXX_FLAGS) + + string(APPEND CMAKE_CUDA_FLAGS " -Xfatbin -compress-all") + diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_disable_DataType_dependent_test_if_tensorboard_is_not_available.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_disable_DataType_dependent_test_if_tensorboard_is_not_available.patch new file mode 100644 index 000000000000..fd39fbcb1e59 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_disable_DataType_dependent_test_if_tensorboard_is_not_available.patch @@ -0,0 +1,127 @@ +Disable use of DataType in test when tensorboard module is not available +The corresponding import is already protected + +Updated: Alexander Grund (TU Dresden) + +diff --git a/test/test_tensorboard.py b/test/test_tensorboard.py +index 24f2687c7dc..141d5941ff6 100644 +--- a/test/test_tensorboard.py ++++ b/test/test_tensorboard.py +@@ -870,64 +870,65 @@ class TestTensorBoardNumpy(BaseTestCase): + res = make_np({"pytorch": 1.0}) + + +-class TestTensorProtoSummary(BaseTestCase): +- @parametrize( +- "tensor_type,proto_type", +- [ +- (torch.float16, DataType.DT_HALF), +- (torch.bfloat16, DataType.DT_BFLOAT16), +- ], +- ) +- @skipIfTorchDynamo("Unsuitable test for Dynamo, behavior changes with version") +- def test_half_tensor_proto(self, tensor_type, proto_type): +- float_values = [1.0, 2.0, 3.0] +- actual_proto = ( +- tensor_proto( +- "dummy", +- torch.tensor(float_values, dtype=tensor_type), +- ) +- .value[0] +- .tensor +- ) +- self.assertSequenceEqual( +- [int_to_half(x) for x in actual_proto.half_val], +- float_values, +- ) +- self.assertTrue(actual_proto.dtype == proto_type) +- +- def test_float_tensor_proto(self): +- float_values = [1.0, 2.0, 3.0] +- actual_proto = tensor_proto("dummy", torch.tensor(float_values)).value[0].tensor +- self.assertEqual(actual_proto.float_val, float_values) +- self.assertTrue(actual_proto.dtype == DataType.DT_FLOAT) +- +- def test_int_tensor_proto(self): +- int_values = [1, 2, 3] +- actual_proto = ( +- tensor_proto("dummy", torch.tensor(int_values, dtype=torch.int32)) +- .value[0] +- .tensor ++if TEST_TENSORBOARD: ++ class TestTensorProtoSummary(BaseTestCase): ++ @parametrize( ++ "tensor_type,proto_type", ++ [ ++ (torch.float16, DataType.DT_HALF), ++ (torch.bfloat16, DataType.DT_BFLOAT16), ++ ], + ) +- self.assertEqual(actual_proto.int_val, int_values) +- self.assertTrue(actual_proto.dtype == DataType.DT_INT32) ++ @skipIfTorchDynamo("Unsuitable test for Dynamo, behavior changes with version") ++ def test_half_tensor_proto(self, tensor_type, proto_type): ++ float_values = [1.0, 2.0, 3.0] ++ actual_proto = ( ++ tensor_proto( ++ "dummy", ++ torch.tensor(float_values, dtype=tensor_type), ++ ) ++ .value[0] ++ .tensor ++ ) ++ self.assertSequenceEqual( ++ [int_to_half(x) for x in actual_proto.half_val], ++ float_values, ++ ) ++ self.assertTrue(actual_proto.dtype == proto_type) ++ ++ def test_float_tensor_proto(self): ++ float_values = [1.0, 2.0, 3.0] ++ actual_proto = tensor_proto("dummy", torch.tensor(float_values)).value[0].tensor ++ self.assertEqual(actual_proto.float_val, float_values) ++ self.assertTrue(actual_proto.dtype == DataType.DT_FLOAT) ++ ++ def test_int_tensor_proto(self): ++ int_values = [1, 2, 3] ++ actual_proto = ( ++ tensor_proto("dummy", torch.tensor(int_values, dtype=torch.int32)) ++ .value[0] ++ .tensor ++ ) ++ self.assertEqual(actual_proto.int_val, int_values) ++ self.assertTrue(actual_proto.dtype == DataType.DT_INT32) + +- def test_scalar_tensor_proto(self): +- scalar_value = 0.1 +- actual_proto = tensor_proto("dummy", torch.tensor(scalar_value)).value[0].tensor +- self.assertAlmostEqual(actual_proto.float_val[0], scalar_value) ++ def test_scalar_tensor_proto(self): ++ scalar_value = 0.1 ++ actual_proto = tensor_proto("dummy", torch.tensor(scalar_value)).value[0].tensor ++ self.assertAlmostEqual(actual_proto.float_val[0], scalar_value) + +- def test_complex_tensor_proto(self): +- real = torch.tensor([1.0, 2.0]) +- imag = torch.tensor([3.0, 4.0]) +- actual_proto = tensor_proto("dummy", torch.complex(real, imag)).value[0].tensor +- self.assertEqual(actual_proto.scomplex_val, [1.0, 3.0, 2.0, 4.0]) ++ def test_complex_tensor_proto(self): ++ real = torch.tensor([1.0, 2.0]) ++ imag = torch.tensor([3.0, 4.0]) ++ actual_proto = tensor_proto("dummy", torch.complex(real, imag)).value[0].tensor ++ self.assertEqual(actual_proto.scomplex_val, [1.0, 3.0, 2.0, 4.0]) + +- def test_empty_tensor_proto(self): +- actual_proto = tensor_proto("dummy", torch.empty(0)).value[0].tensor +- self.assertEqual(actual_proto.float_val, []) ++ def test_empty_tensor_proto(self): ++ actual_proto = tensor_proto("dummy", torch.empty(0)).value[0].tensor ++ self.assertEqual(actual_proto.float_val, []) + + +-instantiate_parametrized_tests(TestTensorProtoSummary) ++ instantiate_parametrized_tests(TestTensorProtoSummary) + + if __name__ == "__main__": + run_tests() diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_disable_tests_which_need_network_download.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_disable_tests_which_need_network_download.patch new file mode 100644 index 000000000000..e962c595d5c3 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_disable_tests_which_need_network_download.patch @@ -0,0 +1,28 @@ +Disable tests that requires external downloads + +Åke Sandgren +Updated: Alexander Grund (TU Dresden) +diff --git a/test/test_hub.py b/test/test_hub.py +index 1447b3dc4a7..bf0b0d929ea 100644 +--- a/test/test_hub.py ++++ b/test/test_hub.py +@@ -25,6 +25,7 @@ TORCHHUB_EXAMPLE_RELEASE_URL = ( + + + @unittest.skipIf(IS_SANDCASTLE, "Sandcastle cannot ping external") ++@unittest.skip('EasyBuild: do not want tests to depend on downloading') + class TestHub(TestCase): + def setUp(self): + super().setUp() +diff --git a/test/test_nn.py b/test/test_nn.py +index 0af76d427e2..68f661e7a1d 100644 +--- a/test/test_nn.py ++++ b/test/test_nn.py +@@ -152,6 +152,7 @@ class TestNN(NNTestCase): + for b in m.buffers(): + self.assertFalse(b.requires_grad) + ++ @unittest.skip('EasyBuild: do not want tests to depend on downloading') + def test_module_backcompat(self): + from torch.serialization import SourceChangeWarning + path = download_file('https://download.pytorch.org/test_data/linear.pt') diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-ExcTests.test_trigger_on_error.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-ExcTests.test_trigger_on_error.patch new file mode 100644 index 000000000000..5f3a97e74ab8 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-ExcTests.test_trigger_on_error.patch @@ -0,0 +1,33 @@ +Fix a failure in dynamo/test_exc.py that shows this diff in the expected output. +This was seemingly fixed manually in a merge commit in https://github.com/pytorch/pytorch/pull/143926 so there is no more specific commit. + +Author: Alexander Grund (TU Dresden) +--- a/test/dynamo/test_exc.py ++++ b/test/dynamo/test_exc.py +@@ -257,18 +257,13 @@ def fn(x, shape): + ==> (== L['shape'][2] s3) + ==> (== L['x'].size()[0] s0) + ==> (> s0 1) +- ==> (True) + + Target Expressions: + ==> (!= (+ s1 s2 s3) s0) +- ==> (<= (+ s1 s2 s3) s0) +- ==> (<= (+ s1 s2) (+ s0 (* -1 s3))) +- ==> (<= (+ s1 s2) s0) + ==> (<= 0 s1) + ==> (<= 0 s2) + ==> (<= 0 s3) + ==> (<= 2 s0) +- ==> (<= s1 (+ s0 (* -1 s2))) + ==> (== 0 L['x'].storage_offset()) + ==> (== 1 L['x'].stride()[0]) + ==> (== L['shape'][0] s1) +@@ -277,7 +272,6 @@ def fn(x, shape): + ==> (== L['x'].size()[0] s0) + ==> (> s0 0) + ==> (>= 0 s1) +- ==> (And (<= (+ s1 s2) s0) (<= (* -1 s0) (+ s1 s2))) + + Failed Source Expressions: + ==> (== (+ L['shape'][0] L['shape'][1] L['shape'][2]) L['x'].size()[0])""", diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-accuracy-issues-in-linalg_solve.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-accuracy-issues-in-linalg_solve.patch new file mode 100644 index 000000000000..471e3deb40ba --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-accuracy-issues-in-linalg_solve.patch @@ -0,0 +1,127 @@ +An intended optimization in linalg.solve that no longer applies leads to failures in some tests in test_ops.py +due to small discrepancies depending on the memory layout of the input tensors. + +test_vmapvjp_linalg_tensorsolve_cpu_float32 fails with: +> Mismatched elements: 222 / 288 (77.1%) +> Greatest absolute difference: 0.13232421875 at index (0, 0, 3, 1, 1) (up to 0.0001 allowed) +> Greatest relative difference: 0.00012596177111845464 at index (0, 1, 3, 2, 0) (up to 0.0001 allowed) + +test_vmapvjpvjp_linalg_tensorsolve_cpu_float32 fails with: +> Mismatched elements: 6 / 288 (2.1%) +> Greatest absolute difference: 0.0005550384521484375 at index (0, 2, 1, 0, 2) (up to 0.0001 allowed) +> Greatest relative difference: 0.0002498962276149541 at index (0, 1, 0, 0, 2) (up to 0.0001 allowed) + +See https://github.com/pytorch/pytorch/issues/151440 + +Author: Alexander Grund (TU Dresden) + +diff --git a/aten/src/ATen/functorch/BatchRulesLinearAlgebra.cpp b/aten/src/ATen/functorch/BatchRulesLinearAlgebra.cpp +index ec5969d32c0..4dc32570c8c 100644 +--- a/aten/src/ATen/functorch/BatchRulesLinearAlgebra.cpp ++++ b/aten/src/ATen/functorch/BatchRulesLinearAlgebra.cpp +@@ -382,14 +382,6 @@ fourOutputs solve_ex_batch_rule( + A_ = ensure_has_bdim(A_, A_bdim.has_value(), batch_size); + B_ = ensure_has_bdim(B_, B_bdim.has_value(), batch_size); + +- // NOTE [ solve_ex Batch Rule Contiguity ] +- // A determines whether or not linalg_solve takes an optimized path. We need the check on A_ to match the one run on +- // A as BatchedTensor since it might have been saved by autograd (specifically by the jvp) and the autograd behvaior +- // differs based on whether or not the optimized path was taken +- const auto batched_A_was_contiguous = A_bdim.has_value() ? at::select(A, *A_bdim, 0).is_contiguous() : A.is_contiguous(); +- if (batched_A_was_contiguous && !A.is_complex()) { +- A_ = A_.contiguous(); +- } + auto res = _linalg_solve_ex(A_, B_, left, check_errors); + return std::make_tuple(std::move(std::get<0>(res)), 0, std::move(std::get<1>(res)), 0, std::move(std::get<2>(res)), 0, std::move(std::get<3>(res)), 0); + } +diff --git a/aten/src/ATen/native/BatchLinearAlgebra.cpp b/aten/src/ATen/native/BatchLinearAlgebra.cpp +index 02b5d76892e..82c90d6fe41 100644 +--- a/aten/src/ATen/native/BatchLinearAlgebra.cpp ++++ b/aten/src/ATen/native/BatchLinearAlgebra.cpp +@@ -1946,15 +1946,10 @@ TORCH_IMPL_FUNC(_linalg_solve_ex_out)(const Tensor& A, + const Tensor& LU, + const Tensor& pivots, + const Tensor& info) { +- // Possible optimization: Compute the LU factorization of A^T if A is contiguous +- // Then we solve A^T X = B with adjoint=True +- // This saves a copy as A doesn't need to be copied into an F-contig matrix in lu_factor +- // This optimization makes functorch's batching rule difficult. See NOTE [ solve_ex Batch Rule Contiguity ] +- const bool use_A_T = A.is_contiguous() && !A.is_complex(); + at::linalg_lu_factor_ex_out(const_cast(LU), + const_cast(pivots), + const_cast(info), +- use_A_T ? A.mT() : A); ++ A); + if (check_errors) { + at::_linalg_check_errors(info, "torch.linalg.solve_ex", A.dim() == 2); + } +@@ -1963,7 +1958,7 @@ TORCH_IMPL_FUNC(_linalg_solve_ex_out)(const Tensor& A, + const bool vector_case = at::native::linalg_solve_is_vector_rhs(LU, B); + auto result_ = vector_case ? result.unsqueeze(-1) : result; + auto B_ = vector_case ? B.unsqueeze(-1) : B; +- at::linalg_lu_solve_out(result_, LU, pivots, B_, left, /*adjoint*/use_A_T); ++ at::linalg_lu_solve_out(result_, LU, pivots, B_, left); + } + + std::tuple linalg_solve_ex_out(const Tensor& A, +diff --git a/tools/autograd/derivatives.yaml b/tools/autograd/derivatives.yaml +index fa77b906b1b..9493a92e933 100644 +--- a/tools/autograd/derivatives.yaml ++++ b/tools/autograd/derivatives.yaml +@@ -1576,7 +1576,7 @@ + + - name: _linalg_solve_ex(Tensor A, Tensor B, *, bool left=True, bool check_errors=False) -> (Tensor result, Tensor LU, Tensor pivots, Tensor info) + A, B: linalg_solve_backward(grad, result, A, LU, pivots, left, grad_input_mask[1]) +- result: "linalg_solve_jvp(A_t, B_t, result, LU, pivots, left, A_p.is_contiguous() && !A_p.is_complex())" ++ result: "linalg_solve_jvp(A_t, B_t, result, LU, pivots, left)" + output_differentiability: [True, False, False, False] # LU is an auxiliary tensor not exposed to the user + + - name: sort(Tensor self, int dim=-1, bool descending=False) -> (Tensor values, Tensor indices) +diff --git a/torch/csrc/autograd/FunctionsManual.cpp b/torch/csrc/autograd/FunctionsManual.cpp +index f231373ff65..d683c37987e 100644 +--- a/torch/csrc/autograd/FunctionsManual.cpp ++++ b/torch/csrc/autograd/FunctionsManual.cpp +@@ -5823,8 +5823,7 @@ Tensor linalg_solve_jvp( + const Tensor& X, + const Tensor& LU, + const Tensor& pivots, +- const bool left, +- const bool use_A_T) { ++ const bool left) { + at::NoTF32Guard disable_tf32; + // For left=True (left=False is analogous) + // dX = A^{-1}(dB - dAX) +@@ -5847,7 +5846,7 @@ Tensor linalg_solve_jvp( + auto dB_ = vector_to_matrix(dB); + auto R_ = left ? dA.matmul(X_) : X_.matmul(dA); + auto dX_ = +- at::linalg_lu_solve(LU, pivots, dB_ - R_, left, /*adjoint*/ use_A_T); ++ at::linalg_lu_solve(LU, pivots, dB_ - R_, left); + return matrix_to_vector(dX_); + } + +@@ -5885,9 +5884,8 @@ std::tuple linalg_solve_backward( + if (at::GradMode::is_enabled()) { + gB_ = at::linalg_solve(A.mH(), vector_to_matrix(gX), left); + } else { +- const auto use_A_T = A.is_contiguous() && !A.is_complex(); + gB_ = at::linalg_lu_solve( +- LU, pivots, vector_to_matrix(gX), left, /*adjoint*/ !use_A_T); ++ LU, pivots, vector_to_matrix(gX), left, /*adjoint*/ true); + } + + Tensor gA_; +diff --git a/torch/csrc/autograd/FunctionsManual.h b/torch/csrc/autograd/FunctionsManual.h +index 4f9fe796947..124ec87509f 100644 +--- a/torch/csrc/autograd/FunctionsManual.h ++++ b/torch/csrc/autograd/FunctionsManual.h +@@ -866,8 +866,7 @@ Tensor linalg_solve_jvp( + const Tensor& X, + const Tensor& LU, + const Tensor& pivots, +- const bool left, +- const bool use_A_T); ++ const bool left); + Tensor lu_unpack_backward( + const Tensor& L_grad, + const Tensor& U_grad, diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-distributed-tests-without-gpus.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-distributed-tests-without-gpus.patch new file mode 100644 index 000000000000..65400fa56732 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-distributed-tests-without-gpus.patch @@ -0,0 +1,37 @@ +If there are no GPUs there would be a WORLD_SIZE=0 which doesn't work. +Use a positive number for the NCCL/GLOO tests in that case. + +See https://github.com/pytorch/pytorch/pull/150764 + +Author: Alexander Grund (TU Dresden) +diff --git a/test/run_test.py b/test/run_test.py +index a508d8db4d2..e7bbe6ea086 100755 +--- a/test/run_test.py ++++ b/test/run_test.py +@@ -610,21 +610,22 @@ DISTRIBUTED_TESTS_CONFIG = {} + + + if dist.is_available(): ++ num_gpus = torch.cuda.device_count() + DISTRIBUTED_TESTS_CONFIG["test"] = {"WORLD_SIZE": "1"} + if not TEST_WITH_ROCM and dist.is_mpi_available(): + DISTRIBUTED_TESTS_CONFIG["mpi"] = { + "WORLD_SIZE": "3", + "TEST_REPORT_SOURCE_OVERRIDE": "dist-mpi", + } +- if dist.is_nccl_available(): ++ if dist.is_nccl_available() and num_gpus > 0: + DISTRIBUTED_TESTS_CONFIG["nccl"] = { +- "WORLD_SIZE": f"{torch.cuda.device_count()}", ++ "WORLD_SIZE": f"{num_gpus}", + "TEST_REPORT_SOURCE_OVERRIDE": "dist-nccl", + } +- if dist.is_gloo_available(): ++ if dist.is_gloo_available() and num_gpus > 0: + DISTRIBUTED_TESTS_CONFIG["gloo"] = { + # TODO: retire testing gloo with CUDA +- "WORLD_SIZE": f"{torch.cuda.device_count()}", ++ "WORLD_SIZE": f"{num_gpus}", + "TEST_REPORT_SOURCE_OVERRIDE": "dist-gloo", + } + # Test with UCC backend is deprecated. diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-edge-case-causing-test_trigger_bisect_on_error-failure.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-edge-case-causing-test_trigger_bisect_on_error-failure.patch new file mode 100644 index 000000000000..fc488f80f79b --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-edge-case-causing-test_trigger_bisect_on_error-failure.patch @@ -0,0 +1,34 @@ +This fixes a failure in test_trigger_bisect_on_error +> torch/fx/experimental/validator.py", line 838 +> assert left in exception and isinstance(exception[left], ValidationException) +> ^^^^^^^^^^^^^^^^^ +> AssertionError: + +From a86fa779ce3482324a0d1fbb12d87a95a981f0a3 Mon Sep 17 00:00:00 2001 +From: Ryan Guo +Date: Wed, 22 Jan 2025 14:23:11 -0800 +Subject: [PATCH] [BE] Fix edge case in translation validation bisector + (#145414) + +This patch fixes a small bug for the binary-search algorithm in +translation validation bisector. Fixes #131303. + +Pull Request resolved: https://github.com/pytorch/pytorch/pull/145414 + +index 61a51b977311d..17a814b233c63 100644 +--- a/torch/fx/experimental/validator.py ++++ b/torch/fx/experimental/validator.py +@@ -819,7 +819,13 @@ def check_node_fails(node: torch.fx.Node) -> Optional[ValidationException]: + ] + + # Preparing the indices for binary search. ++ # The overall invariants are ++ # - for all i < left, assert_node[i] doesn't fail ++ # - for all i >= right, assert_node[i] fails ++ # - `right in exception` always holds ++ # - `left <= right` always holds + left, mid, right = 0, 0, len(assert_nodes) - 1 ++ exception[right] = check_node_fails(assert_nodes[right]) + + while left < right: + mid = (left + right) // 2 diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-flaky-test_aot_export_with_torch_cond.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-flaky-test_aot_export_with_torch_cond.patch new file mode 100644 index 000000000000..ebe291d2f0bc --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-flaky-test_aot_export_with_torch_cond.patch @@ -0,0 +1,22 @@ +Fix random failures of test_aot_export_with_torch_cond +> - cond = torch.ops.higher_order.cond(gt, true_graph_0, false_graph_0, [arg0_1]); gt = true_graph_0 = false_graph_0 = arg0_1 = None +> + cond = torch.ops.higher_order.cond(gt, true_graph_0, false_graph_0, [arg0_1, 3, 4]); gt = true_graph_0 = false_graph_0 = arg0_1 = None +> ? ++++++ + +Trivial backport of https://github.com/pytorch/pytorch/pull/145330 + +Author: Alexander Grund (TU Dresden) + +--- a/test/functorch/test_aotdispatch.py ++++ b/test/functorch/test_aotdispatch.py +@@ -3973,6 +3972,10 @@ def forward(self, *args): + + + class TestAOTExport(AOTTestCase): ++ def setUp(self): ++ super().setUp() ++ torch._dynamo.reset() ++ + def test_aot_export_ban_dropout_mut_pre_dispatch(self): + def fn(p, x): + y = torch.ops.aten.dropout.default(x, 0.1, train=False) diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-inductor-device-interface.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-inductor-device-interface.patch new file mode 100644 index 000000000000..19423c7e84d6 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-inductor-device-interface.patch @@ -0,0 +1,25 @@ +test_triton_extension_backend.py shows an error: +> torch._dynamo.exc.BackendCompilerFailed: backend='inductor' raised: +> LoweringException: NotImplementedError: +> target: aten.min.default + +This is due to changes in other parts of the code and this method should be removed. +See also https://github.com/pytorch/pytorch/pull/144399 + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/inductor/extension_backends/triton/device_interface.py b/test/inductor/extension_backends/triton/device_interface.py +index 9ca96e71a7d..14b3ca5436e 100644 +--- a/test/inductor/extension_backends/triton/device_interface.py ++++ b/test/inductor/extension_backends/triton/device_interface.py +@@ -108,10 +108,6 @@ class DeviceInterface(device_interface.DeviceInterface): + def synchronize(device) -> None: + pass + +- @staticmethod +- def get_device_properties(device) -> DeviceProperties: +- raise NotImplementedError +- + # Can be mock patched by @patch decorator. + @staticmethod + def is_available() -> bool: diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-server-in-test_control_plane.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-server-in-test_control_plane.patch new file mode 100644 index 000000000000..84a711b25574 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-server-in-test_control_plane.patch @@ -0,0 +1,15 @@ +Using "" may cause a "connection refused" error in some environments. +Using the localhost IP is more reliable. +diff --git a/test/distributed/elastic/test_control_plane.py b/test/distributed/elastic/test_control_plane.py +index ede4e352b04..c34bd4b1919 100644 +--- a/test/distributed/elastic/test_control_plane.py ++++ b/test/distributed/elastic/test_control_plane.py +@@ -157,7 +157,7 @@ class WorkerServerTest(TestCase): + + from torch._C._distributed_c10d import _WorkerServer + +- server = _WorkerServer("", 1234) ++ server = _WorkerServer("127.0.0.1", 1234) + out = requests.get("http://localhost:1234/handler/") + self.assertEqual(out.status_code, 200) + diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-skip-decorators.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-skip-decorators.patch new file mode 100644 index 000000000000..2e6fd803a3cb --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-skip-decorators.patch @@ -0,0 +1,125 @@ +The decorators are implemented to run when the function is called which is after +the test `setup` method spawned subprocesses which may use NCCL to sync failing when there are +not enough GPUs available. +So replace the custom code by calls to the `unittest` skip decorators. +See hhttps://github.com/pytorch/pytorch/pull/109491 + +Author: Alexander Grund (TU Dresden) +diff --git a/torch/testing/_internal/common_distributed.py b/torch/testing/_internal/common_distributed.py +index d34b1ffdb0a..8f9628f209b 100644 +--- a/torch/testing/_internal/common_distributed.py ++++ b/torch/testing/_internal/common_distributed.py +@@ -155,17 +155,7 @@ def skip_if_odd_worldsize(func): + + + def require_n_gpus_for_nccl_backend(n, backend): +- def decorator(func): +- @wraps(func) +- def wrapper(*args, **kwargs): +- if backend == "nccl" and torch.cuda.device_count() < n: +- sys.exit(TEST_SKIPS[f"multi-gpu-{n}"].exit_code) +- else: +- return func(*args, **kwargs) +- +- return wrapper +- +- return decorator ++ return unittest.skipUnless(at_least_x_gpu(n)) if backend == "nccl" else unittest.skipIf(False, None) + + + def import_transformers_or_skip(): +@@ -192,34 +182,9 @@ def at_least_x_gpu(x): + + + def skip_if_lt_x_gpu(x): +- def decorator(func): +- @wraps(func) +- def wrapper(*args, **kwargs): +- if torch.cuda.is_available() and torch.cuda.device_count() >= x: +- return func(*args, **kwargs) +- if TEST_HPU and torch.hpu.device_count() >= x: +- return func(*args, **kwargs) +- sys.exit(TEST_SKIPS[f"multi-gpu-{x}"].exit_code) +- +- return wrapper +- +- return decorator +- +- +-# This decorator helps avoiding initializing cuda while testing other backends +-def nccl_skip_if_lt_x_gpu(backend, x): +- def decorator(func): +- @wraps(func) +- def wrapper(*args, **kwargs): +- if backend != "nccl": +- return func(*args, **kwargs) +- if torch.cuda.is_available() and torch.cuda.device_count() >= x: +- return func(*args, **kwargs) +- sys.exit(TEST_SKIPS[f"multi-gpu-{x}"].exit_code) +- +- return wrapper +- +- return decorator ++ return unittest.skipUnless(torch.cuda.device_count() >= x or ( ++ TEST_HPU and torch.hpu.device_count() >= x), ++ TEST_SKIPS[f"multi-gpu-{x}"].message) + + + def verify_ddp_error_logged(model_DDP, err_substr): +diff --git a/torch/testing/_internal/distributed/distributed_test.py b/torch/testing/_internal/distributed/distributed_test.py +index a4d6d53b975..0da1d9baddf 100644 +--- a/torch/testing/_internal/distributed/distributed_test.py ++++ b/torch/testing/_internal/distributed/distributed_test.py +@@ -66,7 +66,6 @@ from torch.testing._internal.common_distributed import ( + skip_if_small_worldsize, + skip_if_odd_worldsize, + skip_if_lt_x_gpu, +- nccl_skip_if_lt_x_gpu, + skip_if_no_gpu, + require_n_gpus_for_nccl_backend, + requires_nccl_version, +@@ -5299,7 +5298,7 @@ class DistributedTest: + BACKEND != "mpi" and BACKEND != "nccl" and BACKEND != "gloo", + "get_future is only supported on mpi, nccl and gloo", + ) +- @nccl_skip_if_lt_x_gpu(BACKEND, 2) ++ @require_n_gpus_for_nccl_backend(2, BACKEND) + def test_accumulate_gradients_no_sync(self): + """ + Runs _test_accumulate_gradients_no_sync using default inputs +@@ -5310,7 +5309,7 @@ class DistributedTest: + BACKEND != "mpi" and BACKEND != "nccl" and BACKEND != "gloo", + "get_future is only supported on mpi, nccl and gloo", + ) +- @nccl_skip_if_lt_x_gpu(BACKEND, 2) ++ @require_n_gpus_for_nccl_backend(2, BACKEND) + def test_accumulate_gradients_no_sync_grad_is_view(self): + """ + Runs _test_accumulate_gradients_no_sync using default inputs +@@ -5321,7 +5320,7 @@ class DistributedTest: + BACKEND != "mpi" and BACKEND != "nccl" and BACKEND != "gloo", + "get_future is only supported on mpi, nccl and gloo", + ) +- @nccl_skip_if_lt_x_gpu(BACKEND, 2) ++ @require_n_gpus_for_nccl_backend(2, BACKEND) + def test_accumulate_gradients_no_sync_allreduce_hook(self): + """ + Runs multiple iterations on _test_accumulate_gradients_no_sync +@@ -5349,7 +5348,7 @@ class DistributedTest: + BACKEND != "mpi" and BACKEND != "nccl" and BACKEND != "gloo", + "get_future is only supported on mpi, nccl and gloo", + ) +- @nccl_skip_if_lt_x_gpu(BACKEND, 2) ++ @require_n_gpus_for_nccl_backend(2, BACKEND) + def test_accumulate_gradients_no_sync_allreduce_with_then_hook(self): + """ + Runs multiple iterations on _test_accumulate_gradients_no_sync using allreduce +@@ -5383,7 +5382,7 @@ class DistributedTest: + BACKEND != "mpi" and BACKEND != "nccl" and BACKEND != "gloo", + "get_future is only supported on mpi, nccl and gloo", + ) +- @nccl_skip_if_lt_x_gpu(BACKEND, 2) ++ @require_n_gpus_for_nccl_backend(2, BACKEND) + def test_get_future(self): + def mult(fut): + return [t * 3 for t in fut.wait()] diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-sympy-1.13-compat.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-sympy-1.13-compat.patch new file mode 100644 index 000000000000..4618da11c9b4 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-sympy-1.13-compat.patch @@ -0,0 +1,24 @@ +There is an assertion failure with sympy 1.13.3: +> File "torch/fx/experimental/symbolic_shapes.py", line 6403, in _evaluate_expr +> assert orig_expr == hint, f"{orig_expr} != {hint}" +> ^^^^^^^^^^^^^^^^^ +> AssertionError: 0 != 0.0 + +Backport of https://github.com/pytorch/pytorch/pull/147197 + +Author: Alexander Grund (TU Dresden) + +--- a/torch/fx/experimental/symbolic_shapes.py ++++ b/torch/fx/experimental/symbolic_shapes.py +@@ -6400,7 +6400,10 @@ class ShapeEnv: + if orig_expr.is_number: + self.log.debug("eval %s [trivial]", orig_expr) + if hint is not None: +- assert orig_expr == hint, f"{orig_expr} != {hint}" ++ if isinstance(hint, bool): ++ assert orig_expr == hint, f"{orig_expr} != {hint}" ++ else: ++ assert sympy.Eq(orig_expr, hint), f"{orig_expr} != {hint}" + return orig_expr + + expr = orig_expr diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-test_autograd_cpp_node_saved_float.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-test_autograd_cpp_node_saved_float.patch new file mode 100644 index 000000000000..d508b09164bf --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-test_autograd_cpp_node_saved_float.patch @@ -0,0 +1,17 @@ +Fix failure in TestCompiledAutograd.test_autograd_cpp_node_saved_float +> Expected 3 but got 4. + +Taken from https://github.com/pytorch/pytorch/pull/143247 +--- a/test/inductor/test_compiled_autograd.py ++++ b/test/inductor/test_compiled_autograd.py +@@ -2424,7 +2424,9 @@ def fn(): + yield x.grad + + # compiled autograd and dynamo both support symfloat, but not backend +- self.check_output_and_recompiles(fn, [1, 3]) ++ self.check_output_and_recompiles(fn, [1, 4]) ++ # 1 restart analysis due to specialize_float=False ++ self.assertEqual(counters["stats"]["unique_graphs"], 3) + + @scoped_load_inline + def test_autograd_cpp_node_data_dependent(self, load_inline): diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-test_linear_with_embedding.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-test_linear_with_embedding.patch new file mode 100644 index 000000000000..94591cd56888 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-test_linear_with_embedding.patch @@ -0,0 +1,36 @@ +TestSelectAlgorithmCPU.test_linear_with_embedding fails when the CPU does not support BF16: +> torch._inductor.exc.InductorError: LoweringException: RuntimeError: self and mat2 must have the same dtype, but got Float and BFloat16 +See https://github.com/pytorch/pytorch/issues/147104 + +Convert the embedding layer to avoid it using "Float" and adapt the check for this change. + +Author: Alexander Grund (TU Dresden) +--- a/test/inductor/test_cpu_select_algorithm.py ++++ b/test/inductor/test_cpu_select_algorithm.py +@@ -932,6 +932,7 @@ class TestSelectAlgorithm(BaseTestSelectAlgorithm): + def test_linear_with_embedding( + self, batch_size, in_features, out_features, bias, dtype + ): ++ has_bf16 = torch.ops.mkldnn._is_mkldnn_bf16_supported() + class M(torch.nn.Module): + def __init__(self, bias): + super().__init__() +@@ -939,6 +940,9 @@ class TestSelectAlgorithm(BaseTestSelectAlgorithm): + dtype=dtype + ) + self.emb = torch.nn.Embedding(64, out_features) ++ if not has_bf16: ++ self.emb = self.emb.to(dtype=dtype) ++ + + def forward(self, idx, x): + return self.emb(idx) + self.linear(x) +@@ -949,7 +953,7 @@ class TestSelectAlgorithm(BaseTestSelectAlgorithm): + with verify(dtype) as (atol, rtol): + self.common(mod, (idx, x), atol=atol, rtol=rtol) + self.assertEqual(counters["inductor"]["select_algorithm_autotune"], 1) +- self.assertEqual(counters["inductor"]["cpp_epilogue_fusion_counter"], 1) ++ self.assertEqual(counters["inductor"]["cpp_epilogue_fusion_counter"], 1 if has_bf16 else 0) + + @inductor_config.patch({"freezing": True}) + @patches diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-test_linear_with_in_out_buffer-without-mkl.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-test_linear_with_in_out_buffer-without-mkl.patch new file mode 100644 index 000000000000..a6f086a36daf --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-test_linear_with_in_out_buffer-without-mkl.patch @@ -0,0 +1,16 @@ +This test fails when FlexiBLAS is used instead of MKL. +Adjust the expected count. +See https://github.com/pytorch/pytorch/pull/151548 + +Author: Alexander Grund (TU Dresden) +--- a/test/inductor/test_cpu_select_algorithm.py ++++ b/test/inductor/test_cpu_select_algorithm.py +@@ -1301,7 +1301,7 @@ def forward(self, arg152_1): + rtol=rtol, + ) + self.assertEqual(counters["inductor"]["select_algorithm_autotune"], 2) +- self.assertEqual(counters["inductor"]["cpp_epilogue_fusion_counter"], 2) ++ self.assertEqual(counters["inductor"]["cpp_epilogue_fusion_counter"], 2 if TEST_MKL else 1) + + @inductor_config.patch({"freezing": True}) + @patches diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-test_public_bindings.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-test_public_bindings.patch new file mode 100644 index 000000000000..18f34a0402fd --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-test_public_bindings.patch @@ -0,0 +1,32 @@ +From bf4f8919df8ee88e356b407bb84ed818ebfb407b Mon Sep 17 00:00:00 2001 +From: Howard Huang +Date: Wed, 22 Jan 2025 10:12:15 -0800 +Subject: [PATCH] Fix test_modules_can_be_imported (#145387) + +`test_modules_can_be_imported` test is currently failing due to a few missing private modules and this PR gets it working before I start to clean up the public allow list +Pull Request resolved: https://github.com/pytorch/pytorch/pull/145387 +Approved by: https://github.com/albanD +--- + test/test_public_bindings.py | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/test/test_public_bindings.py b/test/test_public_bindings.py +index c4e819704f17b..61065fcd629d2 100644 +--- a/test/test_public_bindings.py ++++ b/test/test_public_bindings.py +@@ -308,6 +308,7 @@ def onerror(modname): + "torch.onnx._internal.exporter._reporting", + "torch.onnx._internal.exporter._schemas", + "torch.onnx._internal.exporter._tensors", ++ "torch.onnx._internal.exporter._torchlib.ops", + "torch.onnx._internal.exporter._verification", + "torch.onnx._internal.fx._pass", + "torch.onnx._internal.fx.analysis", +@@ -377,6 +378,7 @@ def onerror(modname): + "torch.distributed._spmd.experimental_ops", + "torch.distributed._spmd.parallel_mode", + "torch.distributed._tensor", ++ "torch.distributed._tools.sac_ilp", + "torch.distributed.algorithms._checkpoint.checkpoint_wrapper", + "torch.distributed.algorithms._optimizer_overlap", + "torch.distributed.rpc._testing.faulty_agent_backend_registry", diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-test_unbacked_bindings_for_divisible_u_symint.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-test_unbacked_bindings_for_divisible_u_symint.patch new file mode 100644 index 000000000000..345359eef555 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-test_unbacked_bindings_for_divisible_u_symint.patch @@ -0,0 +1,78 @@ +Fix from https://github.com/pytorch/pytorch/pull/145315 for test_unbacked_bindings_for_divisible_u_symint +often failing with +> Tried to register an operator (mylib::foo(Tensor a, Tensor b) -> Tensor) with the same name and overload name multiple times. + +Trivial backport +Author: Alexander Grund (TU Dresden) +--- a/test/export/test_export.py ++++ b/test/export/test_export.py +@@ -3049,43 +3049,35 @@ def forward(self, x): + @testing.expectedFailureCppSerDes # no unbacked bindings after deserialization? + @testing.expectedFailureSerDerNonStrict + def test_unbacked_bindings_for_divisible_u_symint(self): +- with torch.library._scoped_library("mylib", "FRAGMENT") as lib: +- torch.library.define( +- "mylib::foo", +- "(Tensor a, Tensor b) -> (Tensor)", +- tags=torch.Tag.pt2_compliant_tag, +- lib=lib, +- ) +- +- class M(torch.nn.Module): +- def forward(self, a, b): +- return torch.ops.mylib.foo(a, b) ++ class M(torch.nn.Module): ++ def forward(self, a, b): ++ return torch.ops.mylib.foo_unbacked(a, b) + +- @torch.library.impl("mylib::foo", "cpu", lib=lib) +- def foo_impl(a, b): +- return a[b.item()] ++ @torch.library.custom_op("mylib::foo_unbacked", mutates_args={}) ++ def foo_unbacked(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor: ++ return a[b.item()] + +- @torch.library.register_fake("mylib::foo", lib=lib) +- def foo_fake_impl(a, b): +- ctx = torch.library.get_ctx() +- u = ctx.new_dynamic_size(min=0, max=len(a) // 10) * 10 +- return torch.empty(u, a.shape[1], dtype=a.dtype) ++ @foo_unbacked.register_fake ++ def foo_unbacked_fake_impl(a, b): ++ ctx = torch.library.get_ctx() ++ u = ctx.new_dynamic_size(min=0, max=len(a) // 10) * 10 ++ return torch.empty(u, a.shape[1], dtype=a.dtype) + +- ep = export( +- M(), +- (torch.randn(100, 4), torch.tensor(10)), +- ) +- foo = [node for node in ep.graph.nodes if node.name == "foo"][0] +- unbacked_bindings = foo.meta["unbacked_bindings"] +- self.assertEqual(len(unbacked_bindings), 1) # check binding is {u: path} +- u = next(iter(unbacked_bindings.keys())) +- self.assertEqual( +- type(u).__name__, "Symbol" +- ) # check binding is symbol, not expr +- path = unbacked_bindings[u] +- self.assertEqual(len(path), 3) # check path is [size, 0, DivideByKey(10)] +- self.assertEqual(type(path[2]).__name__, "DivideByKey") +- self.assertEqual(path[2].divisor, 10) ++ ep = export( ++ M(), ++ (torch.randn(100, 4), torch.tensor(10)), ++ ) ++ foo = [node for node in ep.graph.nodes if node.name == "foo_unbacked"][0] ++ unbacked_bindings = foo.meta["unbacked_bindings"] ++ self.assertEqual(len(unbacked_bindings), 1) # check binding is {u: path} ++ u = next(iter(unbacked_bindings.keys())) ++ self.assertEqual( ++ type(u).__name__, "Symbol" ++ ) # check binding is symbol, not expr ++ path = unbacked_bindings[u] ++ self.assertEqual(len(path), 3) # check path is [size, 0, DivideByKey(10)] ++ self.assertEqual(type(path[2]).__name__, "DivideByKey") ++ self.assertEqual(path[2].divisor, 10) + + def test_torch_check_eq_commutativity(self): + class M1(torch.nn.Module): diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-vsx-vector-shift-functions.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-vsx-vector-shift-functions.patch new file mode 100644 index 000000000000..bad316bd3ca6 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-vsx-vector-shift-functions.patch @@ -0,0 +1,173 @@ +The VSX vector shift operators don't handle out-of-bounds values correctly. +See https://github.com/pytorch/pytorch/issues/109777 +Taken from https://github.com/pytorch/pytorch/pull/150524 + +Author: Alexander Grund (TU Dresden) + +diff --git a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int16_vsx.h b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int16_vsx.h +index ae146dae4d42a5..b9e8ac36391c4d 100644 +--- a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int16_vsx.h ++++ b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int16_vsx.h +@@ -334,20 +334,6 @@ class Vectorized { + DEFINE_MEMBER_OP(operator^, int16_t, vec_xor) + }; + +-template <> +-Vectorized inline operator<<(const Vectorized& a, const Vectorized& b) { +- vuint16 shift_vec0 = reinterpret_cast(b.vec0()); +- vuint16 shift_vec1 = reinterpret_cast(b.vec1()); +- return Vectorized{vec_sl(a.vec0(), shift_vec0), vec_sl(a.vec1(), shift_vec1)}; +-} +- +-template <> +-Vectorized inline operator>>(const Vectorized& a, const Vectorized& b) { +- vuint16 shift_vec0 = reinterpret_cast(b.vec0()); +- vuint16 shift_vec1 = reinterpret_cast(b.vec1()) ; +- return Vectorized{vec_sr(a.vec0(), shift_vec0), vec_sr(a.vec1(), shift_vec1)}; +-} +- + template <> + Vectorized inline maximum( + const Vectorized& a, +@@ -362,6 +348,8 @@ Vectorized inline minimum( + return a.minimum(b); + } + ++DEFINE_SHIFT_FUNCS(int16_t) ++ + template <> + Vectorized C10_ALWAYS_INLINE operator+(const Vectorized& a, const Vectorized& b) { + return Vectorized{vec_add(a.vec0(), b.vec0()), vec_add(a.vec1(), b.vec1())}; +diff --git a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int32_vsx.h b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int32_vsx.h +index 98401381c6e822..acb7bd0a5d6690 100644 +--- a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int32_vsx.h ++++ b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int32_vsx.h +@@ -265,20 +265,6 @@ class Vectorized { + DEFINE_MEMBER_OP(operator^, int32_t, vec_xor) + }; + +-template <> +-Vectorized inline operator<<(const Vectorized& a, const Vectorized& b) { +- vuint32 shift_vec0 = reinterpret_cast(b.vec0()); +- vuint32 shift_vec1 = reinterpret_cast(b.vec1()) ; +- return Vectorized{vec_sl(a.vec0(), shift_vec0), vec_sl(a.vec1(), shift_vec1)}; +-} +- +-template <> +-Vectorized inline operator>>(const Vectorized& a, const Vectorized& b) { +- vuint32 shift_vec0 = reinterpret_cast(b.vec0()); +- vuint32 shift_vec1 = reinterpret_cast(b.vec1()) ; +- return Vectorized{vec_sr(a.vec0(), shift_vec0), vec_sr(a.vec1(), shift_vec1)}; +-} +- + template <> + Vectorized inline maximum( + const Vectorized& a, +@@ -293,6 +279,8 @@ Vectorized inline minimum( + return a.minimum(b); + } + ++DEFINE_SHIFT_FUNCS(int32_t) ++ + template <> + Vectorized C10_ALWAYS_INLINE operator+(const Vectorized& a, const Vectorized& b) { + return Vectorized{vec_add(a.vec0(), b.vec0()), vec_add(a.vec1(), b.vec1())}; +diff --git a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int64_vsx.h b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int64_vsx.h +index f8217930fa4989..1744a7cc465ad3 100644 +--- a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int64_vsx.h ++++ b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int64_vsx.h +@@ -218,20 +218,6 @@ class Vectorized { + DEFINE_MEMBER_OP(operator^, int64_t, vec_xor) + }; + +-template <> +-Vectorized inline operator<<(const Vectorized& a, const Vectorized& b) { +- vuint64 shift_vec0 = reinterpret_cast(b.vec0()); +- vuint64 shift_vec1 = reinterpret_cast(b.vec1()) ; +- return Vectorized{vec_sl(a.vec0(), shift_vec0), vec_sl(a.vec1(), shift_vec1)}; +-} +- +-template <> +-Vectorized inline operator>>(const Vectorized& a, const Vectorized& b) { +- vuint64 shift_vec0 = reinterpret_cast(b.vec0()); +- vuint64 shift_vec1 = reinterpret_cast(b.vec1()) ; +- return Vectorized{vec_sr(a.vec0(), shift_vec0), vec_sr(a.vec1(), shift_vec1)}; +-} +- + template <> + Vectorized inline maximum( + const Vectorized& a, +@@ -246,6 +232,8 @@ Vectorized inline minimum( + return a.minimum(b); + } + ++DEFINE_SHIFT_FUNCS(int64_t) ++ + template <> + Vectorized C10_ALWAYS_INLINE operator+(const Vectorized& a, const Vectorized& b) { + return Vectorized{vec_add(a.vec0(), b.vec0()), vec_add(a.vec1(), b.vec1())}; +diff --git a/aten/src/ATen/cpu/vec/vec256/vsx/vsx_helpers.h b/aten/src/ATen/cpu/vec/vec256/vsx/vsx_helpers.h +index 1dc742f3cbb1c2..12e3fc446a600f 100644 +--- a/aten/src/ATen/cpu/vec/vec256/vsx/vsx_helpers.h ++++ b/aten/src/ATen/cpu/vec/vec256/vsx/vsx_helpers.h +@@ -2,6 +2,7 @@ + #include + #include + #include ++#include + + #if defined(__clang__) + typedef __vector __bool char vbool8; +@@ -35,6 +36,11 @@ using vfloat32 = __attribute__((altivec(vector__))) float; + using vfloat64 = __attribute__((altivec(vector__))) double; + #endif + ++inline auto make_vuint(vint8 v){ return reinterpret_cast(v); } ++inline auto make_vuint(vint16 v){ return reinterpret_cast(v); } ++inline auto make_vuint(vint32 v){ return reinterpret_cast(v); } ++inline auto make_vuint(vint64 v){ return reinterpret_cast(v); } ++ + #if !defined(vec_float) + C10_ALWAYS_INLINE vfloat32 vec_float(const vint32& vec_in) { + vfloat32 vec_out; +@@ -469,6 +475,40 @@ const vfloat64 vd_imag_half = vfloat64{0.0, 0.5}; + const vfloat64 vd_sqrt2_2 = vfloat64{0.70710678118654757, 0.70710678118654757}; + const vfloat64 vd_pi_2 = vfloat64{M_PI / 2.0, 0.0}; + ++template ++Vectorized VsxShiftRightArith(const Vectorized& a, const Vectorized& b) { ++ const Vectorized max_shift(sizeof(T) * CHAR_BIT - std::is_signed_v); ++ const auto mask = (b < Vectorized(0)) | (b >= max_shift); ++ const auto shift = Vectorized::blendv(b, max_shift, mask); ++ return Vectorized{ ++ vec_sra(a.vec0(), make_vuint(shift.vec0())), ++ vec_sra(a.vec1(), make_vuint(shift.vec1()))}; ++} ++ ++template ++Vectorized VsxShiftLeftArith(const Vectorized& a, const Vectorized& b) { ++ const Vectorized max_shift(sizeof(T) * CHAR_BIT); ++ const auto mask = (b < Vectorized(0)) | (b >= max_shift); ++ Vectorized ret( ++ vec_sl(a.vec0(), make_vuint(b.vec0())), ++ vec_sl(a.vec1(), make_vuint(b.vec1()))); ++ return Vectorized::blendv(ret, Vectorized(0), mask); ++} ++ ++#define DEFINE_SHIFT_FUNCS(operand_type) \ ++ template <> \ ++ Vectorized C10_ALWAYS_INLINE operator>>( \ ++ const Vectorized& a, \ ++ const Vectorized& b) { \ ++ return VsxShiftRightArith(a, b); \ ++ } \ ++ template <> \ ++ Vectorized C10_ALWAYS_INLINE operator<<( \ ++ const Vectorized& a, \ ++ const Vectorized& b) { \ ++ return VsxShiftLeftArith(a, b); \ ++ } \ ++ + } // namespace + } // namespace vec + } // namespace at diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-xnnpack-float16-convert.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-xnnpack-float16-convert.patch new file mode 100644 index 000000000000..c8f7490acb2a --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-xnnpack-float16-convert.patch @@ -0,0 +1,36 @@ +Backport of https://github.com/google/XNNPACK/commit/5f23827e66cca435fa400b6e221892ac95af0079 to fix +> unary-elementwise.cc:125:14: error: invalid 'static_cast' from type 'xnn_bfloat16' to type '_Float16' + +Author: Alexander Grund (TU Dresden) +diff -ur a/third_party/XNNPACK/src/reference/unary-elementwise.cc b/third_party/XNNPACK/src/reference/unary-elementwise.cc +--- a/third_party/XNNPACK/src/reference/unary-elementwise.cc 2025-04-02 11:29:03.536599185 +0200 ++++ b/third_party/XNNPACK/src/reference/unary-elementwise.cc 2025-04-02 11:31:46.182485083 +0200 +@@ -127,6 +127,16 @@ + } + }; + ++#ifdef XNN_HAVE_FLOAT16 ++template <> ++struct ConvertOp { ++ explicit ConvertOp(const xnn_unary_uparams*) {} ++ _Float16 operator()(xnn_bfloat16 x) const { ++ return static_cast<_Float16>(static_cast(x)); ++ } ++}; ++#endif ++ + template + const xnn_unary_elementwise_config* get_convert_config( + std::true_type /*input_quantized*/, std::true_type /*output_quantized*/) { +diff -ur a/third_party/XNNPACK/src/xnnpack/simd/s16-neon.h b/third_party/XNNPACK/src/xnnpack/simd/s16-neon.h +--- a/third_party/XNNPACK/src/xnnpack/simd/s16-neon.h 2025-04-02 11:29:03.497600412 +0200 ++++ b/third_party/XNNPACK/src/xnnpack/simd/s16-neon.h 2025-04-02 11:30:22.054130343 +0200 +@@ -70,7 +70,7 @@ + v_low = vget_high_s16(v); + } + if (num_elements & 2) { +- vst1_lane_s32((void*) output, vreinterpret_s32_s16(v_low), 0); ++ vst1_lane_s32((int32_t*) output, vreinterpret_s32_s16(v_low), 0); + output += 2; + v_low = vext_s16(v_low, v_low, 2); + } diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_increase-tolerance-test_aotdispatch-matmul.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_increase-tolerance-test_aotdispatch-matmul.patch new file mode 100644 index 000000000000..aa3113072052 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_increase-tolerance-test_aotdispatch-matmul.patch @@ -0,0 +1,36 @@ +test_aot_autograd_exhaustive_matmul_cpu_float32 and test_aot_autograd_exhaustive___rmatmul___cpu_float32 +fail when using OpenBLAS instead of MKL: + +> Mismatched elements: 1 / 10 (10.0%) +> Greatest absolute difference: 5.91278076171875e-05 at index (7,) (up to 1e-05 allowed) +> Greatest relative difference: 3.468156592134619e-06 at index (7,) (up to 1.3e-06 allowed) + +Relax the tolerances to allow it to pass. + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/functorch/test_aotdispatch.py b/test/functorch/test_aotdispatch.py +index 6213f8f0817..b7748ad8707 100644 +--- a/test/functorch/test_aotdispatch.py ++++ b/test/functorch/test_aotdispatch.py +@@ -74,6 +74,7 @@ from torch.testing._internal.common_utils import ( + skipIfRocm, + skipIfTorchDynamo, + TestCase, ++ TEST_MKL, + xfail_inherited_tests, + xfailIfS390X, + xfailIfTorchDynamo, +@@ -6434,6 +6435,12 @@ aot_autograd_failures = { + decorate("nn.functional.conv2d", decorator=unittest.skipIf(IS_ARM64, "flaky")), + } + ++if not TEST_MKL: ++ aot_autograd_failures.update({ ++ decorate("matmul", decorator=toleranceOverride({torch.float32: tol(atol=6e-05, rtol=4e-06)})), ++ decorate("__rmatmul__", decorator=toleranceOverride({torch.float32: tol(atol=6e-05, rtol=4e-06)})), ++ }) ++ + symbolic_aot_autograd_failures = { + xfail("combinations", ""), # aten.masked_select.default + xfail( diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_increase-tolerance-test_quick-baddbmm.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_increase-tolerance-test_quick-baddbmm.patch new file mode 100644 index 000000000000..e1018a5ce66f --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_increase-tolerance-test_quick-baddbmm.patch @@ -0,0 +1,23 @@ +Avoid a test failure in test_quick_baddbmm_cpu_complex64 in test_decomp.py +on e.g. AMD EPYC (znver2) or Intel Sapphire Rapids: + +> AssertionError: Tensor-likes are not close! +> Greatest absolute difference: 3.43852152582258e-05 at index (1, 2, 1) (up to 1e-05 allowed) +> Greatest relative difference: 3.6034286949870875e-06 at index (1, 2, 1) (up to 1.3e-06 allowed) + +The failure doesn't happen with e.g. `-march=znver1` and is small enough to ignore. + +Author: Alexander Grund (TU Dresden) + +--- a/torch/testing/_internal/common_methods_invocations.py ++++ b/torch/testing/_internal/common_methods_invocations.py +@@ -12259,6 +12259,9 @@ op_db: List[OpInfo] = [ + DecorateInfo( + toleranceOverride({torch.complex64: tol(atol=1e-05, rtol=1.2e-03)}), + 'TestCommon', 'test_variant_consistency_eager', device_type='cuda'), ++ DecorateInfo( ++ toleranceOverride({torch.complex64: tol(atol=4e-05, rtol=4e-06)}), ++ 'TestDecomp', 'test_quick'), + DecorateInfo( + toleranceOverride({torch.complex64: tol(atol=1e-05, rtol=1.2e-03)}), + 'TestMathBits', 'test_conj_view', device_type='cuda'), diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_increase-tolerance-test_vmap_autograd_grad.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_increase-tolerance-test_vmap_autograd_grad.patch new file mode 100644 index 000000000000..0529e9d78ad2 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_increase-tolerance-test_vmap_autograd_grad.patch @@ -0,0 +1,25 @@ +TestOperatorsCPU.test_vmap_autograd_grad_nn_functional_conv2d_cpu_float32 fails with: +> AssertionError: Tensor-likes are not close! +> +> Mismatched elements: 2 / 144 (1.4%) +> Greatest absolute difference: 1.1444091796875e-05 at index (0, 4, 0, 0, 2) (up to 1e-05 allowed) +> Greatest relative difference: 2.064850013994146e-05 at index (0, 4, 0, 0, 2) (up to 1.3e-06 allowed) +> +> The failure occurred for item [1] +See https://github.com/pytorch/pytorch/issues/151113 +Increase the tolerance to the same as for CUDA. + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/functorch/test_ops.py b/test/functorch/test_ops.py +index a4269ff84d5..f46189909e5 100644 +--- a/test/functorch/test_ops.py ++++ b/test/functorch/test_ops.py +@@ -2414,7 +2414,6 @@ class TestOperators(TestCase): + tol1( + "nn.functional.conv2d", + {torch.float32: tol(atol=5e-05, rtol=5e-05)}, +- device_type="cuda", + ), + tol1("svd_lowrank", {torch.float32: tol(atol=5e-05, rtol=5e-05)}), + tol1("pca_lowrank", {torch.float32: tol(atol=5e-05, rtol=5e-05)}), diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_remove-test_slice_with_floordiv.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_remove-test_slice_with_floordiv.patch new file mode 100644 index 000000000000..acbfbbedf26b --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_remove-test_slice_with_floordiv.patch @@ -0,0 +1,51 @@ +Remove TestExport.test_slice_with_floordiv that does not trigger required runtime assertion + +Ported from https://github.com/pytorch/pytorch/pull/145292 + +Author: Alexander Grund (TU Dresden) + +--- a/test/export/test_export.py ++++ b/test/export/test_export.py +@@ -9947,42 +9947,6 @@ def forward(self, x, y): + ep.graph_module.code + ) + +- @testing.expectedFailureCppSerDes +- @testing.expectedFailureLegacyExportNonStrict +- @testing.expectedFailureLegacyExportStrict +- def test_slice_with_floordiv(self): +- # slice operation emits runtime assert s0//2 <= s1 +- class M1(torch.nn.Module): +- def forward(self, x, y): +- d = x.size(0) // 2 +- return y[d:] +- +- class M(torch.nn.Module): +- def __init__(self) -> None: +- super().__init__() +- self.m1 = M1() +- +- def forward(self, x, y): +- d = x.size(0) // 2 +- m1_res = self.m1(x, y) +- return y[d:] + m1_res +- +- inputs = (torch.ones(10), torch.ones(10)) +- d0 = torch.export.Dim("d0", max=2048) +- d1 = torch.export.Dim("d1", max=2048) +- ep = export( +- M(), +- inputs, +- dynamic_shapes=((d0,), (d1,)), +- ) +- ep.module()(torch.ones(8), torch.ones(4)) +- ep.module()(torch.ones(8), torch.ones(5)) +- with self.assertRaisesRegex( +- RuntimeError, +- r"Runtime assertion failed for expression \(s0//2\) \<\= s1", +- ): +- ep.module()(torch.ones(10), torch.ones(4)) +- + def test_split_const_gm_with_lifted_constants(self): + class Model(torch.nn.Module): + def __init__(self) -> None: diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-diff-test-on-ppc.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-diff-test-on-ppc.patch new file mode 100644 index 000000000000..b8f6222dcc42 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-diff-test-on-ppc.patch @@ -0,0 +1,26 @@ +The workaround for over/underflow isn't implemented for PPC yet. +So skip the test. +See https://github.com/pytorch/pytorch/issues/109870 + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/test_binary_ufuncs.py b/test/test_binary_ufuncs.py +index ee9fb490356..ba18b28adeb 100644 +--- a/test/test_binary_ufuncs.py ++++ b/test/test_binary_ufuncs.py +@@ -66,6 +66,7 @@ from torch.testing._internal.common_utils import ( + TestCase, + torch_to_numpy_dtype_dict, + xfailIfTorchDynamo, ++ IS_PPC, + ) + + +@@ -1115,6 +1116,7 @@ class TestBinaryUfuncs(TestCase): + ) + + @dtypes(*complex_types()) ++ @skipIf(IS_PPC, "Vectorized div fails on PPC: #109870") + def test_complex_div_underflow_overflow(self, device, dtype): + # test to make sure the complex division does not produce underflow or overflow + # in the intermediate of its calculations diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-test-requiring-MKL.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-test-requiring-MKL.patch new file mode 100644 index 000000000000..a5369ff3398a --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-test-requiring-MKL.patch @@ -0,0 +1,14 @@ +The test checks for a fusion relying on MKL being available so fails without: +> AssertionError: 'mkl._mkl_linear' not found in ... + +Author: Alexander Grund (TU Dresden) +--- a/test/inductor/test_mkldnn_pattern_matcher.py ++++ b/test/inductor/test_mkldnn_pattern_matcher.py +@@ -3157,6 +3157,7 @@ class TestPatternMatcher(TestPatternMatcherBase): + om(*example_inputs) + om(*example_inputs) + ++ @unittest.skipIf(not TEST_MKL, "Test requires MKL") + @xfailIfACL + @torch._dynamo.config.patch("inline_inbuilt_nn_modules", True) + def test_reproduce_121253_issue_addmm_fusion_check(self): diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-test_checkpoint_wrapper_parity-on-cpu.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-test_checkpoint_wrapper_parity-on-cpu.patch new file mode 100644 index 000000000000..2d277a433896 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-test_checkpoint_wrapper_parity-on-cpu.patch @@ -0,0 +1,24 @@ +When no GPUs are available test_checkpoint_wrapper_parity fails with +> AttributeError: module 'torch.cpu' has no attribute 'reset_peak_memory_stats' + +Author: Alexander Grund (TU Dresden) +diff --git a/test/distributed/fsdp/test_checkpoint_wrapper.py b/test/distributed/fsdp/test_checkpoint_wrapper.py +index 0f873b49297..afda0c13a6c 100644 +--- a/test/distributed/fsdp/test_checkpoint_wrapper.py ++++ b/test/distributed/fsdp/test_checkpoint_wrapper.py +@@ -1,6 +1,7 @@ + # Owner(s): ["oncall: distributed"] + + import contextlib ++import unittest + from copy import deepcopy + from functools import partial + +@@ -132,6 +133,7 @@ class CheckpointWrapperTest(TestCase): + m(torch.randn(2, 1)).sum().backward() + self.assertEqual(2, count) + ++ @unittest.skipIf(device_type.type == "cpu", "CPU does not support max_memory_allocated") + def test_checkpoint_wrapper_parity(self): + """ + Tests that using checkpoint_wrapper or the functional diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-test_init_from_local_shards.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-test_init_from_local_shards.patch new file mode 100644 index 000000000000..3e11a13d56da --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-test_init_from_local_shards.patch @@ -0,0 +1,24 @@ +The test often times out and seems to be considered flaky by PyTorch: +https://github.com/pytorch/pytorch/issues/78068 + +Author: Alexander Grund (TU Dresden) +diff --git a/test/distributed/_shard/sharded_tensor/test_sharded_tensor.py b/test/distributed/_shard/sharded_tensor/test_sharded_tensor.py +index 730b2c2c0ac..5f9b9545700 100644 +--- a/test/distributed/_shard/sharded_tensor/test_sharded_tensor.py ++++ b/test/distributed/_shard/sharded_tensor/test_sharded_tensor.py +@@ -7,6 +7,7 @@ import math + import pickle + import sys + from typing import List ++from unittest import skip + + import torch + import torch.distributed as dist +@@ -2426,6 +2427,7 @@ class TestShardedTensorFromLocalShards(ShardedTensorTestBase): + @with_comms + @skip_if_lt_x_gpu(4) + @requires_nccl() ++ @skip("Times out often") + def test_init_from_local_shards(self): + local_shard_metadata = ShardMetadata( + shard_offsets=[(self.rank // 2) * 5, (self.rank % 2) * 5], diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-test_jvp_linalg_det_singular.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-test_jvp_linalg_det_singular.patch new file mode 100644 index 000000000000..f553f0d8b201 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-test_jvp_linalg_det_singular.patch @@ -0,0 +1,48 @@ +test_jvp_linalg_det_singular_cpu_float32 in functorch/test_ops.py fails consistently with the error +> Mismatched elements: 1 / 2 (50.0%) +> Greatest absolute difference: 0.024928677827119827 at index (1,) (up to 1e-05 allowed) +> Greatest relative difference: 1.0 at index (1,) (up to 1.3e-06 allowed) + +This is an already known issue on x86 MacOS, see https://github.com/pytorch/pytorch/issues/110980 +So just skip this test. + +Similar for test_vmapjvpall_linalg_det_singular_cpu_float32 and test_forward_mode_AD_linalg_det_singular_cpu_float64 +see https://github.com/pytorch/pytorch/issues/111583 + + + +All det_singular are removed in 2.7: https://github.com/pytorch/pytorch/commit/3a3e2cf90a03fcf332a275f331fcb57e46d6c325 + +Author: Alexander Grund (TU Dresden) +diff --git a/test/functorch/test_ops.py b/test/functorch/test_ops.py +index a4269ff84d5..c0fad294489 100644 +--- a/test/functorch/test_ops.py ++++ b/test/functorch/test_ops.py +@@ -589,7 +589,7 @@ class TestOperators(TestCase): + decorate( + "linalg.det", + "singular", +- decorator=expectedFailureIf(IS_MACOS and IS_X86), ++ decorator=unittest.skipIf(IS_X86, 'Known failure: #110980'), + ), + } + ), +@@ -1350,7 +1350,7 @@ class TestOperators(TestCase): + decorate( + "linalg.det", + "singular", +- decorator=expectedFailureIf(IS_MACOS and IS_X86), ++ decorator=expectedFailureIf(IS_X86), + ), + } + ), +--- a/torch/testing/_internal/opinfo/definitions/linalg.py ++++ b/torch/testing/_internal/opinfo/definitions/linalg.py +@@ -1233,7 +1233,6 @@ op_db: List[OpInfo] = [ + "test_forward_mode_AD", + device_type="cpu", + dtypes=(torch.float64,), +- active_if=IS_MACOS, + ), + # Both Hessians are incorrect on complex inputs?? + DecorateInfo( diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-test_segfault.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-test_segfault.patch new file mode 100644 index 000000000000..9a585ff53a77 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-test_segfault.patch @@ -0,0 +1,16 @@ +The test succeeds although it seems to fail in some of the upstream CI configs. +Just disable it. + +Author: Alexander Grund (TU Dresden) + +--- a/test/test_dataloader.py ++++ b/test/test_dataloader.py +@@ -1388,7 +1388,7 @@ except RuntimeError as e: + # please don't forget to remove this skip when remove the xfailIfLinux. + @skipIfXpu + # https://github.com/pytorch/pytorch/issues/128551 +- @xfailIfLinux ++ @unittest.skip("unexpected success on e.g. AMD CPU") + def test_segfault(self): + p = ErrorTrackingProcess(target=_test_segfault) + p.start() diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-tests-without-fbgemm.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-tests-without-fbgemm.patch new file mode 100644 index 000000000000..3d985af079b9 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-tests-without-fbgemm.patch @@ -0,0 +1,50 @@ +Those tests (from test_ao_sparsity & test_quantization) require FBGEMM which may not be available. +So add the skip decorator. +See https://github.com/pytorch/pytorch/issues/87364 + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/ao/sparsity/test_composability.py b/test/ao/sparsity/test_composability.py +index 1156a7ecb8a..4239196c293 100644 +--- a/test/ao/sparsity/test_composability.py ++++ b/test/ao/sparsity/test_composability.py +@@ -14,6 +14,7 @@ from torch.ao.quantization.quantize_fx import ( + prepare_fx, + prepare_qat_fx, + ) ++from torch.testing._internal.common_quantization import skipIfNoFBGEMM + from torch.testing._internal.common_utils import TestCase, xfailIfS390X + + +@@ -71,6 +71,7 @@ def _calculate_sparsity(tensor): + # This series of tests are to check the composability goals for sparsity and quantization. Namely + # that performing quantization and sparsity model manipulations in various orderings + # does not cause problems ++@skipIfNoFBGEMM + class TestComposability(TestCase): + # This test checks whether performing quantization prepare before sparse prepare + # causes any issues and verifies that the correct observers are inserted and that +@@ -346,6 +347,7 @@ class TestFxComposability(TestCase): + """ + + @xfailIfS390X ++ @skipIfNoFBGEMM + def test_q_prep_fx_before_s_prep(self): + r""" + This test checks that the ordering of prepare_fx -> sparse prepare -> convert_fx +@@ -480,6 +482,7 @@ class TestFxComposability(TestCase): + self.assertGreaterAlmostEqual(cur_sparsity, sparse_config[0]["sparsity_level"]) + + @xfailIfS390X ++ @skipIfNoFBGEMM + def test_s_prep_before_q_prep_fx(self): + r""" + This test checks that the ordering of sparse prepare -> prepare_fx -> convert_fx +@@ -532,6 +535,7 @@ class TestFxComposability(TestCase): + self.assertGreaterAlmostEqual(cur_sparsity, sparse_config[0]["sparsity_level"]) + + @xfailIfS390X ++ @skipIfNoFBGEMM + def test_s_prep_before_qat_prep_fx(self): + r""" + This test checks that the ordering of sparse prepare -> prepare_qat_fx -> convert_fx diff --git a/easybuild/easyconfigs/p/pytest-subtests/pytest-subtests-0.13.1-GCCcore-13.2.0.eb b/easybuild/easyconfigs/p/pytest-subtests/pytest-subtests-0.13.1-GCCcore-13.2.0.eb new file mode 100644 index 000000000000..8a3c7154825a --- /dev/null +++ b/easybuild/easyconfigs/p/pytest-subtests/pytest-subtests-0.13.1-GCCcore-13.2.0.eb @@ -0,0 +1,22 @@ +easyblock = 'PythonPackage' + +name = 'pytest-subtests' +version = '0.13.1' + +homepage = 'https://github.com/pytest-dev/pytest-subtests' +description = "unittest subTest() support and subtests fixture." + +toolchain = {'name': 'GCCcore', 'version': '13.2.0'} + +builddependencies = [ + ('binutils', '2.40'), +] +dependencies = [ + ('Python', '3.11.5'), + ('Python-bundle-PyPI', '2023.10'), +] + +sources = [f'{name.replace("-", "_")}-%(version)s.tar.gz'] +checksums = ['989e38f0f1c01bc7c6b2e04db7d9fd859db35d77c2c1a430c831a70cbf3fde2d'] + +moduleclass = 'tools'