diff --git a/easybuild/easyconfigs/p/PuLP/PuLP-2.9.0-foss-2023b.eb b/easybuild/easyconfigs/p/PuLP/PuLP-2.9.0-foss-2023b.eb
new file mode 100644
index 000000000000..8b028268db66
--- /dev/null
+++ b/easybuild/easyconfigs/p/PuLP/PuLP-2.9.0-foss-2023b.eb
@@ -0,0 +1,26 @@
+easyblock = 'PythonPackage'
+
+name = 'PuLP'
+version = '2.9.0'
+
+homepage = 'https://github.com/coin-or/pulp'
+description = """
+PuLP is an LP modeler written in Python. PuLP can generate MPS or LP files and
+call GLPK, COIN-OR CLP/CBC, CPLEX, GUROBI, MOSEK, XPRESS, CHOCO, MIPCL, SCIP to
+solve linear problems.
+"""
+
+toolchain = {'name': 'foss', 'version': '2023b'}
+
+sources = [SOURCELOWER_TAR_GZ]
+checksums = ['2e30e6c0ef2c0edac185220e3e53faca62eb786a9bd68465208f05bc63e850f3']
+
+dependencies = [
+    ('Python', '3.11.5'),
+    ('GLPK', '5.0'),
+    ('Cbc', '2.10.11'),
+    # Gurobi requires a seperate license
+    # ('Gurobi', '9.5.0'),
+]
+
+moduleclass = 'tools'
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0-foss-2023b.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0-foss-2023b.eb
new file mode 100644
index 000000000000..c0b30959612f
--- /dev/null
+++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0-foss-2023b.eb
@@ -0,0 +1,214 @@
+name = 'PyTorch'
+version = '2.6.0'
+
+homepage = 'https://pytorch.org/'
+description = """Tensors and Dynamic neural networks in Python with strong GPU acceleration.
+PyTorch is a deep learning framework that puts Python first."""
+
+toolchain = {'name': 'foss', 'version': '2023b'}
+
+source_urls = [GITHUB_RELEASE]
+sources = ['%(namelower)s-v%(version)s.tar.gz']
+patches = [
+    'PyTorch-1.7.0_disable-dev-shm-test.patch',
+    'PyTorch-1.12.1_add-hypothesis-suppression.patch',
+    'PyTorch-1.12.1_fix-TestTorch.test_to.patch',
+    'PyTorch-1.13.1_fix-gcc-12-warning-in-fbgemm.patch',
+    'PyTorch-1.13.1_skip-failing-singular-grad-test.patch',
+    'PyTorch-2.0.1_avoid-test_quantization-failures.patch',
+    'PyTorch-2.0.1_skip-failing-gradtest.patch',
+    'PyTorch-2.0.1_skip-test_shuffle_reproducibility.patch',
+    'PyTorch-2.0.1_skip-tests-skipped-in-subprocess.patch',
+    'PyTorch-2.1.0_remove-test-requiring-online-access.patch',
+    'PyTorch-2.1.0_skip-dynamo-test_predispatch.patch',
+    'PyTorch-2.1.2_workaround_dynamo_failure_without_nnpack.patch',
+    'PyTorch-2.3.0_disable_test_linear_package_if_no_half_types_are_available.patch',
+    'PyTorch-2.3.0_fix-mkldnn-avx512-f32-bias.patch',
+    'PyTorch-2.3.0_skip_test_var_mean_differentiable.patch',
+    'PyTorch-2.6.0_add-checkfunctionexists-include.patch',
+    'PyTorch-2.6.0_avoid_caffe2_test_cpp_jit.patch',
+    'PyTorch-2.6.0_disable_DataType_dependent_test_if_tensorboard_is_not_available.patch',
+    'PyTorch-2.6.0_disable_tests_which_need_network_download.patch',
+    'PyTorch-2.6.0_disable-gcc12-warnings.patch',
+    'PyTorch-2.6.0_fix-accuracy-issues-in-linalg_solve.patch',
+    'PyTorch-2.6.0_fix-distributed-tests-without-gpus.patch',
+    'PyTorch-2.6.0_fix-edge-case-causing-test_trigger_bisect_on_error-failure.patch',
+    'PyTorch-2.6.0_fix-ExcTests.test_trigger_on_error.patch',
+    'PyTorch-2.6.0_fix-flaky-test_aot_export_with_torch_cond.patch',
+    'PyTorch-2.6.0_fix-inductor-device-interface.patch',
+    'PyTorch-2.6.0_fix-server-in-test_control_plane.patch',
+    'PyTorch-2.6.0_fix-skip-decorators.patch',
+    'PyTorch-2.6.0_fix-test_autograd_cpp_node_saved_float.patch',
+    'PyTorch-2.6.0_fix-test_linear_with_embedding.patch',
+    'PyTorch-2.6.0_fix-test_linear_with_in_out_buffer-without-mkl.patch',
+    'PyTorch-2.6.0_fix-test_public_bindings.patch',
+    'PyTorch-2.6.0_fix-test_unbacked_bindings_for_divisible_u_symint.patch',
+    'PyTorch-2.6.0_fix-vsx-vector-shift-functions.patch',
+    'PyTorch-2.6.0_fix-xnnpack-float16-convert.patch',
+    'PyTorch-2.6.0_increase-tolerance-test_aotdispatch-matmul.patch',
+    'PyTorch-2.6.0_increase-tolerance-test_quick-baddbmm.patch',
+    'PyTorch-2.6.0_increase-tolerance-test_vmap_autograd_grad.patch',
+    'PyTorch-2.6.0_remove-test_slice_with_floordiv.patch',
+    'PyTorch-2.6.0_skip-diff-test-on-ppc.patch',
+    'PyTorch-2.6.0_skip-test_checkpoint_wrapper_parity-on-cpu.patch',
+    'PyTorch-2.6.0_skip-test_init_from_local_shards.patch',
+    'PyTorch-2.6.0_skip-test_jvp_linalg_det_singular.patch',
+    'PyTorch-2.6.0_skip-test-requiring-MKL.patch',
+    'PyTorch-2.6.0_skip-test_segfault.patch',
+    'PyTorch-2.6.0_skip-tests-without-fbgemm.patch',
+]
+checksums = [
+    {'pytorch-v2.6.0.tar.gz': '3005690eb7b083c443a38c7657938af63902f524ad87a6c83f1aca38c77e3b57'},
+    {'PyTorch-1.7.0_disable-dev-shm-test.patch': '622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a'},
+    {'PyTorch-1.12.1_add-hypothesis-suppression.patch':
+     'e71ffb94ebe69f580fa70e0de84017058325fdff944866d6bd03463626edc32c'},
+    {'PyTorch-1.12.1_fix-TestTorch.test_to.patch': '75f27987c3f25c501e719bd2b1c70a029ae0ee28514a97fe447516aee02b1535'},
+    {'PyTorch-1.13.1_fix-gcc-12-warning-in-fbgemm.patch':
+     '5c7be91a6096083a0b1315efe0001537499c600f1f569953c6a2c7f4cc1d0910'},
+    {'PyTorch-1.13.1_skip-failing-singular-grad-test.patch':
+     '72688a57b2bb617665ad1a1d5e362c5111ae912c10936bb38a089c0204729f48'},
+    {'PyTorch-2.0.1_avoid-test_quantization-failures.patch':
+     '02e3f47e4ed1d7d6077e26f1ae50073dc2b20426269930b505f4aefe5d2f33cd'},
+    {'PyTorch-2.0.1_skip-failing-gradtest.patch': '8030bdec6ba49b057ab232d19a7f1a5e542e47e2ec340653a246ec9ed59f8bc1'},
+    {'PyTorch-2.0.1_skip-test_shuffle_reproducibility.patch':
+     '7047862abc1abaff62954da59700f36d4f39fcf83167a638183b1b7f8fec78ae'},
+    {'PyTorch-2.0.1_skip-tests-skipped-in-subprocess.patch':
+     '166c134573a95230e39b9ea09ece3ad8072f39d370c9a88fb2a1e24f6aaac2b5'},
+    {'PyTorch-2.1.0_remove-test-requiring-online-access.patch':
+     '35184b8c5a1b10f79e511cc25db3b8a5585a5d58b5d1aa25dd3d250200b14fd7'},
+    {'PyTorch-2.1.0_skip-dynamo-test_predispatch.patch':
+     '6298daf9ddaa8542850eee9ea005f28594ab65b1f87af43d8aeca1579a8c4354'},
+    {'PyTorch-2.1.2_workaround_dynamo_failure_without_nnpack.patch':
+     'fb96eefabf394617bbb3fbd3a7a7c1aa5991b3836edc2e5d2a30e708bfe49ba1'},
+    {'PyTorch-2.3.0_disable_test_linear_package_if_no_half_types_are_available.patch':
+     '23416f2d9d5226695ec3fbea0671e3650c655c19deefd3f0f8ddab5afa50f485'},
+    {'PyTorch-2.3.0_fix-mkldnn-avx512-f32-bias.patch':
+     'ee07d21c3ac7aeb0bd0e39507b18a417b9125284a529102929c4b5c6727c2976'},
+    {'PyTorch-2.3.0_skip_test_var_mean_differentiable.patch':
+     '9703fd0f1fca8916f6d79d83e9a7efe8e3f717362a5fdaa8f5d9da90d0c75018'},
+    {'PyTorch-2.6.0_add-checkfunctionexists-include.patch':
+     '93579e35e946fb06025a50c42f3625ed8b8ac9f503a963cc23767e2c8869f0ea'},
+    {'PyTorch-2.6.0_avoid_caffe2_test_cpp_jit.patch':
+     '88d03d90359bc1fe3cfa3562624d4fbfd4c6654c9199c556ca912ac55289ce55'},
+    {'PyTorch-2.6.0_disable_DataType_dependent_test_if_tensorboard_is_not_available.patch':
+     '74db866787f1e666ed3b35db5204f05a0ba8d989fb23057a72dd07928388dc46'},
+    {'PyTorch-2.6.0_disable_tests_which_need_network_download.patch':
+     'fe76129811e4eb24d0e12c397335a4c7971b0c4e48ce9cdb9169f3ef9de7aac4'},
+    {'PyTorch-2.6.0_disable-gcc12-warnings.patch': '892643650788b743106ebe4e70c68be42a756eba797f0f79e31708d6e008a620'},
+    {'PyTorch-2.6.0_fix-accuracy-issues-in-linalg_solve.patch':
+     'a6b1cfe8f03ad5b17437e04e6a0369a25fcc79eed939ce6912ceca1c0ab0f444'},
+    {'PyTorch-2.6.0_fix-distributed-tests-without-gpus.patch':
+     '011cffc098b6818eb160b6bec2e671dec46cb2a8457ce32144ea01cc9ed4290a'},
+    {'PyTorch-2.6.0_fix-edge-case-causing-test_trigger_bisect_on_error-failure.patch':
+     'fd918fa510bf04c95f3bcc2f4abea417632a0fefb278154ec95207ca0d1719ed'},
+    {'PyTorch-2.6.0_fix-ExcTests.test_trigger_on_error.patch':
+     '445472d43a61523b2ed169023f5f6db197bc2df8408f59e6254e55f5cb1d3a11'},
+    {'PyTorch-2.6.0_fix-flaky-test_aot_export_with_torch_cond.patch':
+     '79cf77a795e06c4c3206a998ce8f4a92072f79736803008ede65e5ec2f204bfc'},
+    {'PyTorch-2.6.0_fix-inductor-device-interface.patch':
+     'e8e6af1ea5f01568c23127d4f83aacb482ec9005ba558b68763748a581bcc5bc'},
+    {'PyTorch-2.6.0_fix-server-in-test_control_plane.patch':
+     '1337689ff28ecaa8d1d0edf60d322bcdd7846fec040925325d357b19eb6e4342'},
+    {'PyTorch-2.6.0_fix-skip-decorators.patch': 'ec1ba1ef2a2b2c6753a0b35d10c6af0457fc90fe98e2f77979745d9f79d79c86'},
+    {'PyTorch-2.6.0_fix-test_autograd_cpp_node_saved_float.patch':
+     '928c4b1dc16f3d4a7bec29d8749b89ebd41488845938e2514c7fa8c048950e33'},
+    {'PyTorch-2.6.0_fix-test_linear_with_embedding.patch':
+     '56c053de7cfaa2f9898c3b036a185b499f5d44a7b4cd0442c45a8c94928322bf'},
+    {'PyTorch-2.6.0_fix-test_linear_with_in_out_buffer-without-mkl.patch':
+     '8cf9e5d434eb8d3b81400622ca23714c7002a0b835e7e08b384b84408c7ed085'},
+    {'PyTorch-2.6.0_fix-test_public_bindings.patch':
+     '066d88acd8156ed3f91b6a8e924de57f8aef944aa1bf67dc453b830ee1c26094'},
+    {'PyTorch-2.6.0_fix-test_unbacked_bindings_for_divisible_u_symint.patch':
+     '5f5ce1e275888cd6a057a0769fffaa9e49dde003ba191fd70b0265d8c6259a9b'},
+    {'PyTorch-2.6.0_fix-vsx-vector-shift-functions.patch':
+     '82ce0b48e3b7c3dfd3a2ba915f4675d5c3a6d149646e1e0d6a29eedbbaecc8bd'},
+    {'PyTorch-2.6.0_fix-xnnpack-float16-convert.patch':
+     'a6fcb475040c6fed2c0ec8b3f9c1e9fb964220413e84c8f2ee4092770ee6ac7d'},
+    {'PyTorch-2.6.0_increase-tolerance-test_aotdispatch-matmul.patch':
+     'c1c6ea41504e4479d258225ecefc7e9c5726934601610904ae555501a11e9109'},
+    {'PyTorch-2.6.0_increase-tolerance-test_quick-baddbmm.patch':
+     '9850facdfb5d98451249570788217ede07466cae9ba52cd03afd3ec803ba33c9'},
+    {'PyTorch-2.6.0_increase-tolerance-test_vmap_autograd_grad.patch':
+     '8d5eb53bb0a1456af333ae646c860033d6dd037bd9152601a200ca5c10ebf3cb'},
+    {'PyTorch-2.6.0_remove-test_slice_with_floordiv.patch':
+     '1b7ff59a595b9ebbc042d8ff53e3f6c72a1d3b04fb82228f4433473f28623f9b'},
+    {'PyTorch-2.6.0_skip-diff-test-on-ppc.patch': '6f2f87cad1b0ab8c5a0c7b3f7fbc14e4bdfbe61da26a3934ded9dda7fe368c74'},
+    {'PyTorch-2.6.0_skip-test_checkpoint_wrapper_parity-on-cpu.patch':
+     '600f74de167b6fea4d849229de6d653dc616093b456962729222d6bfa767a8e8'},
+    {'PyTorch-2.6.0_skip-test_init_from_local_shards.patch':
+     '222383195f6a3b7c545ffeadb4dd469b9f3361b42c0866de3d3f0f91f8fbe777'},
+    {'PyTorch-2.6.0_skip-test_jvp_linalg_det_singular.patch':
+     '84abe4769005bbb794852a1b3de370977c9efa351bac4b4775831a02bb18d898'},
+    {'PyTorch-2.6.0_skip-test-requiring-MKL.patch':
+     'f1c9b1c77b09d59317fd52d390e7d948a147325b927ad6373c1fa1d1d6ea1ea8'},
+    {'PyTorch-2.6.0_skip-test_segfault.patch': '26806bd62e6b61b56ebaa52d68ca44c415a28124f684bd2fb373557ada68ef52'},
+    {'PyTorch-2.6.0_skip-tests-without-fbgemm.patch':
+     'ed35099de94a14322a879066da048ec9bc565dc81287b4adc4fec46f9afe90cf'},
+]
+
+osdependencies = [OS_PKG_IBVERBS_DEV]
+
+builddependencies = [
+    ('CMake', '3.27.6'),
+    ('hypothesis', '6.90.0'),
+    # For tests
+    ('parameterized', '0.9.0'),
+    ('pytest-flakefinder', '1.1.0'),
+    ('pytest-rerunfailures', '14.0'),
+    ('pytest-shard', '0.1.2'),
+    ('pytest-subtests', '0.13.1'),
+    ('tlparse', '0.3.5'),
+    ('optree', '0.13.0'),
+    ('unittest-xml-reporting', '3.1.0'),
+]
+
+dependencies = [
+    ('Ninja', '1.11.1'),  # Required for JIT compilation of C++ extensions
+    ('Python', '3.11.5'),
+    ('Python-bundle-PyPI', '2023.10'),
+    ('protobuf', '25.3'),
+    ('protobuf-python', '4.25.3'),
+    ('PuLP', '2.9.0'),
+    ('pybind11', '2.11.1'),
+    ('SciPy-bundle', '2023.11'),
+    ('PyYAML', '6.0.1'),
+    ('MPFR', '4.2.1'),
+    ('GMP', '6.3.0'),
+    ('numactl', '2.0.16'),
+    ('FFmpeg', '6.0'),
+    ('Pillow', '10.2.0'),
+    ('expecttest', '0.2.1'),
+    ('networkx', '3.2.1'),
+    ('typing-extensions', '4.10.0'),
+    ('sympy', '1.12'),
+    ('Z3', '4.13.0',),
+]
+
+buildcmd = '%(python)s setup.py build'  # Run the (long) build in the build step
+
+excluded_tests = {
+    '': [
+        # This test seems to take too long on NVIDIA Ampere at least.
+        'distributed/test_distributed_spawn',
+        # no xdoctest
+        'doctests',
+        # intermittent failures on various systems
+        # See https://github.com/easybuilders/easybuild-easyconfigs/issues/17712
+        'distributed/rpc/test_tensorpipe_agent',
+        # This test is expected to fail when run in their CI, but won't in our case.
+        # It just checks for a "CI" env variable
+        'test_ci_sanity_check_fail',
+    ]
+}
+
+local_test_opts = '--continue-through-error --pipe-logs --verbose %(excluded_tests)s'
+runtest = 'cd test && PYTHONUNBUFFERED=1 %(python)s run_test.py ' + local_test_opts
+
+# Especially test_quantization has a few corner cases that are triggered by the random input values,
+# those cannot be easily avoided, see https://github.com/pytorch/pytorch/issues/107030
+# So allow a low number of tests to fail as the tests "usually" succeed
+max_failed_tests = 6
+
+tests = ['PyTorch-check-cpp-extension.py']
+
+moduleclass = 'ai'
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_add-checkfunctionexists-include.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_add-checkfunctionexists-include.patch
new file mode 100644
index 000000000000..73a90f777ee5
--- /dev/null
+++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_add-checkfunctionexists-include.patch
@@ -0,0 +1,25 @@
+From 8d91bfd9654589c41b3bbb589bcb0bf95443c53e Mon Sep 17 00:00:00 2001
+From: Nikita Shulga <nikita.shulga@gmail.com>
+Date: Tue, 28 Jan 2025 08:40:31 -0800
+Subject: [PATCH] [BE] Include CheckFunctionExists in `FindBLAS.cmake`
+ (#145849)
+
+It's used in the script, so it must be included
+Pull Request resolved: https://github.com/pytorch/pytorch/pull/145849
+Approved by: https://github.com/Skylion007
+---
+ cmake/Modules/FindBLAS.cmake | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/cmake/Modules/FindBLAS.cmake b/cmake/Modules/FindBLAS.cmake
+index 5ce875f529206..8e54eedb2aa8f 100644
+--- a/cmake/Modules/FindBLAS.cmake
++++ b/cmake/Modules/FindBLAS.cmake
+@@ -25,6 +25,7 @@ SET(WITH_BLAS "" CACHE STRING "Blas type [accelerate/acml/atlas/blis/generic/got
+ # Old FindBlas
+ INCLUDE(CheckCSourceRuns)
+ INCLUDE(CheckFortranFunctionExists)
++INCLUDE(CheckFunctionExists)
+ 
+ MACRO(Check_Fortran_Libraries LIBRARIES _prefix _name _flags _list)
+   # This macro checks for the existence of the combination of fortran libraries
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_avoid_caffe2_test_cpp_jit.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_avoid_caffe2_test_cpp_jit.patch
new file mode 100644
index 000000000000..7d72fa1a1e14
--- /dev/null
+++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_avoid_caffe2_test_cpp_jit.patch
@@ -0,0 +1,66 @@
+Avoid tripping on //caffe2/test/cpp/jit:test_custom_class_registrations with IS_SANDCASTLE
+
+Author: Ake Sandgren
+Update for 2.6: Alexander Grund (TU Dresden)
+
+diff --git a/test/export/test_export.py b/test/export/test_export.py
+index 703a0c6e918..91892503955 100755
+--- a/test/export/test_export.py
++++ b/test/export/test_export.py
+@@ -11324,7 +11324,7 @@ class TestExportCustomClass(TorchTestCase):
+     def setUp(self):
+         if IS_FBCODE:
+             lib_file_path = "//caffe2/test/cpp/jit:test_custom_class_registrations"
+-        elif IS_SANDCASTLE or IS_MACOS:
++        elif False or IS_MACOS:
+             raise unittest.SkipTest("non-portable load_library call used in test")
+         elif IS_WINDOWS:
+             lib_file_path = find_library_location("torchbind_test.dll")
+diff --git a/test/export/test_lift_unlift.py b/test/export/test_lift_unlift.py
+index c027fc55717..17358101b8c 100644
+--- a/test/export/test_lift_unlift.py
++++ b/test/export/test_lift_unlift.py
+@@ -147,7 +147,7 @@ class TestLift(TestCase):
+     def setUp(self):
+         if IS_MACOS:
+             raise unittest.SkipTest("non-portable load_library call used in test")
+-        elif IS_SANDCASTLE or IS_FBCODE:
++        elif False or IS_FBCODE:
+             torch.ops.load_library(
+                 "//caffe2/test/cpp/jit:test_custom_class_registrations"
+             )
+@@ -380,7 +380,7 @@ class ConstantAttrMapTest(TestCase):
+     def setUp(self):
+         if IS_MACOS:
+             raise unittest.SkipTest("non-portable load_library call used in test")
+-        elif IS_SANDCASTLE or IS_FBCODE:
++        elif False or IS_FBCODE:
+             torch.ops.load_library(
+                 "//caffe2/test/cpp/jit:test_custom_class_registrations"
+             )
+diff --git a/test/test_weak.py b/test/test_weak.py
+index e8b6ee6f556..a6b3f0e052d 100644
+--- a/test/test_weak.py
++++ b/test/test_weak.py
+@@ -593,7 +593,7 @@ class WeakKeyDictionaryScriptObjectTestCase(TestCase):
+ 
+     def __init__(self, *args, **kw):
+         unittest.TestCase.__init__(self, *args, **kw)
+-        if IS_SANDCASTLE or IS_FBCODE:
++        if False or IS_FBCODE:
+             torch.ops.load_library(
+                 "//caffe2/test/cpp/jit:test_custom_class_registrations"
+             )
+diff --git a/torch/testing/_internal/torchbind_impls.py b/torch/testing/_internal/torchbind_impls.py
+index 5566b241f56..63159276572 100644
+--- a/torch/testing/_internal/torchbind_impls.py
++++ b/torch/testing/_internal/torchbind_impls.py
+@@ -113,7 +113,7 @@ def load_torchbind_test_lib():
+         IS_WINDOWS,
+     )
+ 
+-    if IS_SANDCASTLE or IS_FBCODE:
++    if False or IS_FBCODE:
+         torch.ops.load_library("//caffe2/test/cpp/jit:test_custom_class_registrations")
+     elif IS_MACOS:
+         raise unittest.SkipTest("non-portable load_library call used in test")
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_disable-gcc12-warnings.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_disable-gcc12-warnings.patch
new file mode 100644
index 000000000000..6f1f2bd578f5
--- /dev/null
+++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_disable-gcc12-warnings.patch
@@ -0,0 +1,20 @@
+GCC 12 has a false positive warning when compiled for some architectures, e.g. Intel Sapphire Rapids.
+See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112370
+
+Suppress this warning such that the build doesn't error.
+Also disable a false positive that produces a lot of warnings/output.
+
+Author: Alexander Grund (TU Dresden)
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index b74bf4536f4..bb062fa843a 100644
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -653,6 +653,8 @@ if(MSVC)
+   string(APPEND CMAKE_CXX_FLAGS " /FS")
+   string(APPEND CMAKE_CUDA_FLAGS " -Xcompiler /FS")
+ endif(MSVC)
++append_cxx_flag_if_supported("-Wno-free-nonheap-object" CMAKE_CXX_FLAGS)
++append_cxx_flag_if_supported("-Wno-dangling-reference" CMAKE_CXX_FLAGS)
+ 
+ string(APPEND CMAKE_CUDA_FLAGS " -Xfatbin -compress-all")
+ 
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_disable_DataType_dependent_test_if_tensorboard_is_not_available.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_disable_DataType_dependent_test_if_tensorboard_is_not_available.patch
new file mode 100644
index 000000000000..fd39fbcb1e59
--- /dev/null
+++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_disable_DataType_dependent_test_if_tensorboard_is_not_available.patch
@@ -0,0 +1,127 @@
+Disable use of DataType in test when tensorboard module is not available
+The corresponding import is already protected
+
+Updated: Alexander Grund (TU Dresden)
+
+diff --git a/test/test_tensorboard.py b/test/test_tensorboard.py
+index 24f2687c7dc..141d5941ff6 100644
+--- a/test/test_tensorboard.py
++++ b/test/test_tensorboard.py
+@@ -870,64 +870,65 @@ class TestTensorBoardNumpy(BaseTestCase):
+             res = make_np({"pytorch": 1.0})
+ 
+ 
+-class TestTensorProtoSummary(BaseTestCase):
+-    @parametrize(
+-        "tensor_type,proto_type",
+-        [
+-            (torch.float16, DataType.DT_HALF),
+-            (torch.bfloat16, DataType.DT_BFLOAT16),
+-        ],
+-    )
+-    @skipIfTorchDynamo("Unsuitable test for Dynamo, behavior changes with version")
+-    def test_half_tensor_proto(self, tensor_type, proto_type):
+-        float_values = [1.0, 2.0, 3.0]
+-        actual_proto = (
+-            tensor_proto(
+-                "dummy",
+-                torch.tensor(float_values, dtype=tensor_type),
+-            )
+-            .value[0]
+-            .tensor
+-        )
+-        self.assertSequenceEqual(
+-            [int_to_half(x) for x in actual_proto.half_val],
+-            float_values,
+-        )
+-        self.assertTrue(actual_proto.dtype == proto_type)
+-
+-    def test_float_tensor_proto(self):
+-        float_values = [1.0, 2.0, 3.0]
+-        actual_proto = tensor_proto("dummy", torch.tensor(float_values)).value[0].tensor
+-        self.assertEqual(actual_proto.float_val, float_values)
+-        self.assertTrue(actual_proto.dtype == DataType.DT_FLOAT)
+-
+-    def test_int_tensor_proto(self):
+-        int_values = [1, 2, 3]
+-        actual_proto = (
+-            tensor_proto("dummy", torch.tensor(int_values, dtype=torch.int32))
+-            .value[0]
+-            .tensor
++if TEST_TENSORBOARD:
++    class TestTensorProtoSummary(BaseTestCase):
++        @parametrize(
++            "tensor_type,proto_type",
++            [
++                (torch.float16, DataType.DT_HALF),
++                (torch.bfloat16, DataType.DT_BFLOAT16),
++            ],
+         )
+-        self.assertEqual(actual_proto.int_val, int_values)
+-        self.assertTrue(actual_proto.dtype == DataType.DT_INT32)
++        @skipIfTorchDynamo("Unsuitable test for Dynamo, behavior changes with version")
++        def test_half_tensor_proto(self, tensor_type, proto_type):
++            float_values = [1.0, 2.0, 3.0]
++            actual_proto = (
++                tensor_proto(
++                    "dummy",
++                    torch.tensor(float_values, dtype=tensor_type),
++                )
++                .value[0]
++                .tensor
++            )
++            self.assertSequenceEqual(
++                [int_to_half(x) for x in actual_proto.half_val],
++                float_values,
++            )
++            self.assertTrue(actual_proto.dtype == proto_type)
++
++        def test_float_tensor_proto(self):
++            float_values = [1.0, 2.0, 3.0]
++            actual_proto = tensor_proto("dummy", torch.tensor(float_values)).value[0].tensor
++            self.assertEqual(actual_proto.float_val, float_values)
++            self.assertTrue(actual_proto.dtype == DataType.DT_FLOAT)
++
++        def test_int_tensor_proto(self):
++            int_values = [1, 2, 3]
++            actual_proto = (
++                tensor_proto("dummy", torch.tensor(int_values, dtype=torch.int32))
++                .value[0]
++                .tensor
++            )
++            self.assertEqual(actual_proto.int_val, int_values)
++            self.assertTrue(actual_proto.dtype == DataType.DT_INT32)
+ 
+-    def test_scalar_tensor_proto(self):
+-        scalar_value = 0.1
+-        actual_proto = tensor_proto("dummy", torch.tensor(scalar_value)).value[0].tensor
+-        self.assertAlmostEqual(actual_proto.float_val[0], scalar_value)
++        def test_scalar_tensor_proto(self):
++            scalar_value = 0.1
++            actual_proto = tensor_proto("dummy", torch.tensor(scalar_value)).value[0].tensor
++            self.assertAlmostEqual(actual_proto.float_val[0], scalar_value)
+ 
+-    def test_complex_tensor_proto(self):
+-        real = torch.tensor([1.0, 2.0])
+-        imag = torch.tensor([3.0, 4.0])
+-        actual_proto = tensor_proto("dummy", torch.complex(real, imag)).value[0].tensor
+-        self.assertEqual(actual_proto.scomplex_val, [1.0, 3.0, 2.0, 4.0])
++        def test_complex_tensor_proto(self):
++            real = torch.tensor([1.0, 2.0])
++            imag = torch.tensor([3.0, 4.0])
++            actual_proto = tensor_proto("dummy", torch.complex(real, imag)).value[0].tensor
++            self.assertEqual(actual_proto.scomplex_val, [1.0, 3.0, 2.0, 4.0])
+ 
+-    def test_empty_tensor_proto(self):
+-        actual_proto = tensor_proto("dummy", torch.empty(0)).value[0].tensor
+-        self.assertEqual(actual_proto.float_val, [])
++        def test_empty_tensor_proto(self):
++            actual_proto = tensor_proto("dummy", torch.empty(0)).value[0].tensor
++            self.assertEqual(actual_proto.float_val, [])
+ 
+ 
+-instantiate_parametrized_tests(TestTensorProtoSummary)
++    instantiate_parametrized_tests(TestTensorProtoSummary)
+ 
+ if __name__ == "__main__":
+     run_tests()
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_disable_tests_which_need_network_download.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_disable_tests_which_need_network_download.patch
new file mode 100644
index 000000000000..e962c595d5c3
--- /dev/null
+++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_disable_tests_which_need_network_download.patch
@@ -0,0 +1,28 @@
+Disable tests that requires external downloads
+
+Åke Sandgren
+Updated: Alexander Grund (TU Dresden)
+diff --git a/test/test_hub.py b/test/test_hub.py
+index 1447b3dc4a7..bf0b0d929ea 100644
+--- a/test/test_hub.py
++++ b/test/test_hub.py
+@@ -25,6 +25,7 @@ TORCHHUB_EXAMPLE_RELEASE_URL = (
+ 
+ 
+ @unittest.skipIf(IS_SANDCASTLE, "Sandcastle cannot ping external")
++@unittest.skip('EasyBuild: do not want tests to depend on downloading')
+ class TestHub(TestCase):
+     def setUp(self):
+         super().setUp()
+diff --git a/test/test_nn.py b/test/test_nn.py
+index 0af76d427e2..68f661e7a1d 100644
+--- a/test/test_nn.py
++++ b/test/test_nn.py
+@@ -152,6 +152,7 @@ class TestNN(NNTestCase):
+             for b in m.buffers():
+                 self.assertFalse(b.requires_grad)
+ 
++    @unittest.skip('EasyBuild: do not want tests to depend on downloading')
+     def test_module_backcompat(self):
+         from torch.serialization import SourceChangeWarning
+         path = download_file('https://download.pytorch.org/test_data/linear.pt')
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-ExcTests.test_trigger_on_error.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-ExcTests.test_trigger_on_error.patch
new file mode 100644
index 000000000000..5f3a97e74ab8
--- /dev/null
+++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-ExcTests.test_trigger_on_error.patch
@@ -0,0 +1,33 @@
+Fix a failure in dynamo/test_exc.py that shows this diff in the expected output.
+This was seemingly fixed manually in a merge commit in https://github.com/pytorch/pytorch/pull/143926 so there is no more specific commit.
+
+Author: Alexander Grund (TU Dresden)
+--- a/test/dynamo/test_exc.py
++++ b/test/dynamo/test_exc.py
+@@ -257,18 +257,13 @@ def fn(x, shape):
+   ==> (== L['shape'][2] s3)
+   ==> (== L['x'].size()[0] s0)
+   ==> (> s0 1)
+-  ==> (True)
+ 
+ Target Expressions:
+   ==> (!= (+ s1 s2 s3) s0)
+-  ==> (<= (+ s1 s2 s3) s0)
+-  ==> (<= (+ s1 s2) (+ s0 (* -1 s3)))
+-  ==> (<= (+ s1 s2) s0)
+   ==> (<= 0 s1)
+   ==> (<= 0 s2)
+   ==> (<= 0 s3)
+   ==> (<= 2 s0)
+-  ==> (<= s1 (+ s0 (* -1 s2)))
+   ==> (== 0 L['x'].storage_offset())
+   ==> (== 1 L['x'].stride()[0])
+   ==> (== L['shape'][0] s1)
+@@ -277,7 +272,6 @@ def fn(x, shape):
+   ==> (== L['x'].size()[0] s0)
+   ==> (> s0 0)
+   ==> (>= 0 s1)
+-  ==> (And (<= (+ s1 s2) s0) (<= (* -1 s0) (+ s1 s2)))
+ 
+ Failed Source Expressions:
+   ==> (== (+ L['shape'][0] L['shape'][1] L['shape'][2]) L['x'].size()[0])""",
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-accuracy-issues-in-linalg_solve.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-accuracy-issues-in-linalg_solve.patch
new file mode 100644
index 000000000000..471e3deb40ba
--- /dev/null
+++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-accuracy-issues-in-linalg_solve.patch
@@ -0,0 +1,127 @@
+An intended optimization in linalg.solve that no longer applies leads to failures in some tests in test_ops.py
+due to small discrepancies depending on the memory layout of the input tensors.
+
+test_vmapvjp_linalg_tensorsolve_cpu_float32 fails with:
+> Mismatched elements: 222 / 288 (77.1%)
+> Greatest absolute difference: 0.13232421875 at index (0, 0, 3, 1, 1) (up to 0.0001 allowed)
+> Greatest relative difference: 0.00012596177111845464 at index (0, 1, 3, 2, 0) (up to 0.0001 allowed)
+
+test_vmapvjpvjp_linalg_tensorsolve_cpu_float32 fails with:
+> Mismatched elements: 6 / 288 (2.1%)
+> Greatest absolute difference: 0.0005550384521484375 at index (0, 2, 1, 0, 2) (up to 0.0001 allowed)
+> Greatest relative difference: 0.0002498962276149541 at index (0, 1, 0, 0, 2) (up to 0.0001 allowed)
+
+See https://github.com/pytorch/pytorch/issues/151440
+
+Author: Alexander Grund (TU Dresden)
+
+diff --git a/aten/src/ATen/functorch/BatchRulesLinearAlgebra.cpp b/aten/src/ATen/functorch/BatchRulesLinearAlgebra.cpp
+index ec5969d32c0..4dc32570c8c 100644
+--- a/aten/src/ATen/functorch/BatchRulesLinearAlgebra.cpp
++++ b/aten/src/ATen/functorch/BatchRulesLinearAlgebra.cpp
+@@ -382,14 +382,6 @@ fourOutputs solve_ex_batch_rule(
+   A_ = ensure_has_bdim(A_, A_bdim.has_value(), batch_size);
+   B_ = ensure_has_bdim(B_, B_bdim.has_value(), batch_size);
+ 
+-  // NOTE [ solve_ex Batch Rule Contiguity ]
+-  // A determines whether or not linalg_solve takes an optimized path. We need the check on A_ to match the one run on
+-  // A as BatchedTensor since it might have been saved by autograd (specifically by the jvp) and the autograd behvaior
+-  // differs based on whether or not the optimized path was taken
+-  const auto batched_A_was_contiguous = A_bdim.has_value() ? at::select(A, *A_bdim, 0).is_contiguous() : A.is_contiguous();
+-  if (batched_A_was_contiguous && !A.is_complex()) {
+-    A_ = A_.contiguous();
+-  }
+   auto res = _linalg_solve_ex(A_, B_, left, check_errors);
+   return std::make_tuple(std::move(std::get<0>(res)), 0, std::move(std::get<1>(res)), 0, std::move(std::get<2>(res)), 0, std::move(std::get<3>(res)), 0);
+ }
+diff --git a/aten/src/ATen/native/BatchLinearAlgebra.cpp b/aten/src/ATen/native/BatchLinearAlgebra.cpp
+index 02b5d76892e..82c90d6fe41 100644
+--- a/aten/src/ATen/native/BatchLinearAlgebra.cpp
++++ b/aten/src/ATen/native/BatchLinearAlgebra.cpp
+@@ -1946,15 +1946,10 @@ TORCH_IMPL_FUNC(_linalg_solve_ex_out)(const Tensor& A,
+                                       const Tensor& LU,
+                                       const Tensor& pivots,
+                                       const Tensor& info) {
+-  // Possible optimization: Compute the LU factorization of A^T if A is contiguous
+-  // Then we solve A^T X = B with adjoint=True
+-  // This saves a copy as A doesn't need to be copied into an F-contig matrix in lu_factor
+-  // This optimization makes functorch's batching rule difficult. See NOTE [ solve_ex Batch Rule Contiguity ]
+-  const bool use_A_T = A.is_contiguous() && !A.is_complex();
+   at::linalg_lu_factor_ex_out(const_cast<Tensor&>(LU),
+                               const_cast<Tensor&>(pivots),
+                               const_cast<Tensor&>(info),
+-                              use_A_T ? A.mT() : A);
++                              A);
+   if (check_errors) {
+     at::_linalg_check_errors(info, "torch.linalg.solve_ex", A.dim() == 2);
+   }
+@@ -1963,7 +1958,7 @@ TORCH_IMPL_FUNC(_linalg_solve_ex_out)(const Tensor& A,
+   const bool vector_case = at::native::linalg_solve_is_vector_rhs(LU, B);
+   auto result_ = vector_case ? result.unsqueeze(-1) : result;
+   auto B_ = vector_case ? B.unsqueeze(-1) : B;
+-  at::linalg_lu_solve_out(result_, LU, pivots, B_, left, /*adjoint*/use_A_T);
++  at::linalg_lu_solve_out(result_, LU, pivots, B_, left);
+ }
+ 
+ std::tuple<Tensor&, Tensor&> linalg_solve_ex_out(const Tensor& A,
+diff --git a/tools/autograd/derivatives.yaml b/tools/autograd/derivatives.yaml
+index fa77b906b1b..9493a92e933 100644
+--- a/tools/autograd/derivatives.yaml
++++ b/tools/autograd/derivatives.yaml
+@@ -1576,7 +1576,7 @@
+ 
+ - name: _linalg_solve_ex(Tensor A, Tensor B, *, bool left=True, bool check_errors=False) -> (Tensor result, Tensor LU, Tensor pivots, Tensor info)
+   A, B: linalg_solve_backward(grad, result, A, LU, pivots, left, grad_input_mask[1])
+-  result: "linalg_solve_jvp(A_t, B_t, result, LU, pivots, left, A_p.is_contiguous() && !A_p.is_complex())"
++  result: "linalg_solve_jvp(A_t, B_t, result, LU, pivots, left)"
+   output_differentiability: [True, False, False, False]  # LU is an auxiliary tensor not exposed to the user
+ 
+ - name: sort(Tensor self, int dim=-1, bool descending=False) -> (Tensor values, Tensor indices)
+diff --git a/torch/csrc/autograd/FunctionsManual.cpp b/torch/csrc/autograd/FunctionsManual.cpp
+index f231373ff65..d683c37987e 100644
+--- a/torch/csrc/autograd/FunctionsManual.cpp
++++ b/torch/csrc/autograd/FunctionsManual.cpp
+@@ -5823,8 +5823,7 @@ Tensor linalg_solve_jvp(
+     const Tensor& X,
+     const Tensor& LU,
+     const Tensor& pivots,
+-    const bool left,
+-    const bool use_A_T) {
++    const bool left) {
+   at::NoTF32Guard disable_tf32;
+   // For left=True (left=False is analogous)
+   // dX = A^{-1}(dB - dAX)
+@@ -5847,7 +5846,7 @@ Tensor linalg_solve_jvp(
+   auto dB_ = vector_to_matrix(dB);
+   auto R_ = left ? dA.matmul(X_) : X_.matmul(dA);
+   auto dX_ =
+-      at::linalg_lu_solve(LU, pivots, dB_ - R_, left, /*adjoint*/ use_A_T);
++      at::linalg_lu_solve(LU, pivots, dB_ - R_, left);
+   return matrix_to_vector(dX_);
+ }
+ 
+@@ -5885,9 +5884,8 @@ std::tuple<Tensor, Tensor> linalg_solve_backward(
+   if (at::GradMode::is_enabled()) {
+     gB_ = at::linalg_solve(A.mH(), vector_to_matrix(gX), left);
+   } else {
+-    const auto use_A_T = A.is_contiguous() && !A.is_complex();
+     gB_ = at::linalg_lu_solve(
+-        LU, pivots, vector_to_matrix(gX), left, /*adjoint*/ !use_A_T);
++        LU, pivots, vector_to_matrix(gX), left, /*adjoint*/ true);
+   }
+ 
+   Tensor gA_;
+diff --git a/torch/csrc/autograd/FunctionsManual.h b/torch/csrc/autograd/FunctionsManual.h
+index 4f9fe796947..124ec87509f 100644
+--- a/torch/csrc/autograd/FunctionsManual.h
++++ b/torch/csrc/autograd/FunctionsManual.h
+@@ -866,8 +866,7 @@ Tensor linalg_solve_jvp(
+     const Tensor& X,
+     const Tensor& LU,
+     const Tensor& pivots,
+-    const bool left,
+-    const bool use_A_T);
++    const bool left);
+ Tensor lu_unpack_backward(
+     const Tensor& L_grad,
+     const Tensor& U_grad,
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-distributed-tests-without-gpus.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-distributed-tests-without-gpus.patch
new file mode 100644
index 000000000000..65400fa56732
--- /dev/null
+++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-distributed-tests-without-gpus.patch
@@ -0,0 +1,37 @@
+If there are no GPUs there would be a WORLD_SIZE=0 which doesn't work.
+Use a positive number for the NCCL/GLOO tests in that case.
+
+See https://github.com/pytorch/pytorch/pull/150764
+
+Author: Alexander Grund (TU Dresden)
+diff --git a/test/run_test.py b/test/run_test.py
+index a508d8db4d2..e7bbe6ea086 100755
+--- a/test/run_test.py
++++ b/test/run_test.py
+@@ -610,21 +610,22 @@ DISTRIBUTED_TESTS_CONFIG = {}
+ 
+ 
+ if dist.is_available():
++    num_gpus = torch.cuda.device_count()
+     DISTRIBUTED_TESTS_CONFIG["test"] = {"WORLD_SIZE": "1"}
+     if not TEST_WITH_ROCM and dist.is_mpi_available():
+         DISTRIBUTED_TESTS_CONFIG["mpi"] = {
+             "WORLD_SIZE": "3",
+             "TEST_REPORT_SOURCE_OVERRIDE": "dist-mpi",
+         }
+-    if dist.is_nccl_available():
++    if dist.is_nccl_available() and num_gpus > 0:
+         DISTRIBUTED_TESTS_CONFIG["nccl"] = {
+-            "WORLD_SIZE": f"{torch.cuda.device_count()}",
++            "WORLD_SIZE": f"{num_gpus}",
+             "TEST_REPORT_SOURCE_OVERRIDE": "dist-nccl",
+         }
+-    if dist.is_gloo_available():
++    if dist.is_gloo_available() and num_gpus > 0:
+         DISTRIBUTED_TESTS_CONFIG["gloo"] = {
+             # TODO: retire testing gloo with CUDA
+-            "WORLD_SIZE": f"{torch.cuda.device_count()}",
++            "WORLD_SIZE": f"{num_gpus}",
+             "TEST_REPORT_SOURCE_OVERRIDE": "dist-gloo",
+         }
+     # Test with UCC backend is deprecated.
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-edge-case-causing-test_trigger_bisect_on_error-failure.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-edge-case-causing-test_trigger_bisect_on_error-failure.patch
new file mode 100644
index 000000000000..fc488f80f79b
--- /dev/null
+++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-edge-case-causing-test_trigger_bisect_on_error-failure.patch
@@ -0,0 +1,34 @@
+This fixes a failure in test_trigger_bisect_on_error
+> torch/fx/experimental/validator.py", line 838
+> assert left in exception and isinstance(exception[left], ValidationException)
+>        ^^^^^^^^^^^^^^^^^
+> AssertionError: 
+
+From a86fa779ce3482324a0d1fbb12d87a95a981f0a3 Mon Sep 17 00:00:00 2001
+From: Ryan Guo <ryanguo99@meta.com>
+Date: Wed, 22 Jan 2025 14:23:11 -0800
+Subject: [PATCH] [BE] Fix edge case in translation validation bisector
+ (#145414)
+
+This patch fixes a small bug for the binary-search algorithm in
+translation validation bisector. Fixes #131303.
+
+Pull Request resolved: https://github.com/pytorch/pytorch/pull/145414
+
+index 61a51b977311d..17a814b233c63 100644
+--- a/torch/fx/experimental/validator.py
++++ b/torch/fx/experimental/validator.py
+@@ -819,7 +819,13 @@ def check_node_fails(node: torch.fx.Node) -> Optional[ValidationException]:
+     ]
+ 
+     # Preparing the indices for binary search.
++    # The overall invariants are
++    # - for all i < left, assert_node[i] doesn't fail
++    # - for all i >= right, assert_node[i] fails
++    # - `right in exception` always holds
++    # - `left <= right` always holds
+     left, mid, right = 0, 0, len(assert_nodes) - 1
++    exception[right] = check_node_fails(assert_nodes[right])
+ 
+     while left < right:
+         mid = (left + right) // 2
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-flaky-test_aot_export_with_torch_cond.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-flaky-test_aot_export_with_torch_cond.patch
new file mode 100644
index 000000000000..ebe291d2f0bc
--- /dev/null
+++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-flaky-test_aot_export_with_torch_cond.patch
@@ -0,0 +1,22 @@
+Fix random failures of test_aot_export_with_torch_cond
+> -     cond = torch.ops.higher_order.cond(gt, true_graph_0, false_graph_0, [arg0_1]);  gt = true_graph_0 = false_graph_0 = arg0_1 = None
+> +     cond = torch.ops.higher_order.cond(gt, true_graph_0, false_graph_0, [arg0_1, 3, 4]);  gt = true_graph_0 = false_graph_0 = arg0_1 = None
+> ?                                                                                ++++++
+
+Trivial backport of https://github.com/pytorch/pytorch/pull/145330
+
+Author: Alexander Grund (TU Dresden)
+
+--- a/test/functorch/test_aotdispatch.py
++++ b/test/functorch/test_aotdispatch.py
+@@ -3973,6 +3972,10 @@ def forward(self, *args):
+ 
+ 
+ class TestAOTExport(AOTTestCase):
++    def setUp(self):
++        super().setUp()
++        torch._dynamo.reset()
++
+     def test_aot_export_ban_dropout_mut_pre_dispatch(self):
+         def fn(p, x):
+             y = torch.ops.aten.dropout.default(x, 0.1, train=False)
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-inductor-device-interface.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-inductor-device-interface.patch
new file mode 100644
index 000000000000..19423c7e84d6
--- /dev/null
+++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-inductor-device-interface.patch
@@ -0,0 +1,25 @@
+test_triton_extension_backend.py shows an error:
+> torch._dynamo.exc.BackendCompilerFailed: backend='inductor' raised:
+> LoweringException: NotImplementedError: 
+>  target: aten.min.default
+
+This is due to changes in other parts of the code and this method should be removed.
+See also https://github.com/pytorch/pytorch/pull/144399
+
+Author: Alexander Grund (TU Dresden)
+
+diff --git a/test/inductor/extension_backends/triton/device_interface.py b/test/inductor/extension_backends/triton/device_interface.py
+index 9ca96e71a7d..14b3ca5436e 100644
+--- a/test/inductor/extension_backends/triton/device_interface.py
++++ b/test/inductor/extension_backends/triton/device_interface.py
+@@ -108,10 +108,6 @@ class DeviceInterface(device_interface.DeviceInterface):
+     def synchronize(device) -> None:
+         pass
+ 
+-    @staticmethod
+-    def get_device_properties(device) -> DeviceProperties:
+-        raise NotImplementedError
+-
+     # Can be mock patched by @patch decorator.
+     @staticmethod
+     def is_available() -> bool:
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-server-in-test_control_plane.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-server-in-test_control_plane.patch
new file mode 100644
index 000000000000..84a711b25574
--- /dev/null
+++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-server-in-test_control_plane.patch
@@ -0,0 +1,15 @@
+Using "" may cause a "connection refused" error in some environments.
+Using the localhost IP is more reliable.
+diff --git a/test/distributed/elastic/test_control_plane.py b/test/distributed/elastic/test_control_plane.py
+index ede4e352b04..c34bd4b1919 100644
+--- a/test/distributed/elastic/test_control_plane.py
++++ b/test/distributed/elastic/test_control_plane.py
+@@ -157,7 +157,7 @@ class WorkerServerTest(TestCase):
+ 
+         from torch._C._distributed_c10d import _WorkerServer
+ 
+-        server = _WorkerServer("", 1234)
++        server = _WorkerServer("127.0.0.1", 1234)
+         out = requests.get("http://localhost:1234/handler/")
+         self.assertEqual(out.status_code, 200)
+ 
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-skip-decorators.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-skip-decorators.patch
new file mode 100644
index 000000000000..2e6fd803a3cb
--- /dev/null
+++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-skip-decorators.patch
@@ -0,0 +1,125 @@
+The decorators are implemented to run when the function is called which is after
+the test `setup` method spawned subprocesses which may use NCCL to sync failing when there are
+not enough GPUs available.
+So replace the custom code by calls to the `unittest` skip decorators.
+See hhttps://github.com/pytorch/pytorch/pull/109491
+
+Author: Alexander Grund (TU Dresden)
+diff --git a/torch/testing/_internal/common_distributed.py b/torch/testing/_internal/common_distributed.py
+index d34b1ffdb0a..8f9628f209b 100644
+--- a/torch/testing/_internal/common_distributed.py
++++ b/torch/testing/_internal/common_distributed.py
+@@ -155,17 +155,7 @@ def skip_if_odd_worldsize(func):
+ 
+ 
+ def require_n_gpus_for_nccl_backend(n, backend):
+-    def decorator(func):
+-        @wraps(func)
+-        def wrapper(*args, **kwargs):
+-            if backend == "nccl" and torch.cuda.device_count() < n:
+-                sys.exit(TEST_SKIPS[f"multi-gpu-{n}"].exit_code)
+-            else:
+-                return func(*args, **kwargs)
+-
+-        return wrapper
+-
+-    return decorator
++    return unittest.skipUnless(at_least_x_gpu(n)) if backend == "nccl" else unittest.skipIf(False, None)
+ 
+ 
+ def import_transformers_or_skip():
+@@ -192,34 +182,9 @@ def at_least_x_gpu(x):
+ 
+ 
+ def skip_if_lt_x_gpu(x):
+-    def decorator(func):
+-        @wraps(func)
+-        def wrapper(*args, **kwargs):
+-            if torch.cuda.is_available() and torch.cuda.device_count() >= x:
+-                return func(*args, **kwargs)
+-            if TEST_HPU and torch.hpu.device_count() >= x:
+-                return func(*args, **kwargs)
+-            sys.exit(TEST_SKIPS[f"multi-gpu-{x}"].exit_code)
+-
+-        return wrapper
+-
+-    return decorator
+-
+-
+-# This decorator helps avoiding initializing cuda while testing other backends
+-def nccl_skip_if_lt_x_gpu(backend, x):
+-    def decorator(func):
+-        @wraps(func)
+-        def wrapper(*args, **kwargs):
+-            if backend != "nccl":
+-                return func(*args, **kwargs)
+-            if torch.cuda.is_available() and torch.cuda.device_count() >= x:
+-                return func(*args, **kwargs)
+-            sys.exit(TEST_SKIPS[f"multi-gpu-{x}"].exit_code)
+-
+-        return wrapper
+-
+-    return decorator
++    return unittest.skipUnless(torch.cuda.device_count() >= x or (
++                               TEST_HPU and torch.hpu.device_count() >= x),
++                               TEST_SKIPS[f"multi-gpu-{x}"].message)
+ 
+ 
+ def verify_ddp_error_logged(model_DDP, err_substr):
+diff --git a/torch/testing/_internal/distributed/distributed_test.py b/torch/testing/_internal/distributed/distributed_test.py
+index a4d6d53b975..0da1d9baddf 100644
+--- a/torch/testing/_internal/distributed/distributed_test.py
++++ b/torch/testing/_internal/distributed/distributed_test.py
+@@ -66,7 +66,6 @@ from torch.testing._internal.common_distributed import (
+     skip_if_small_worldsize,
+     skip_if_odd_worldsize,
+     skip_if_lt_x_gpu,
+-    nccl_skip_if_lt_x_gpu,
+     skip_if_no_gpu,
+     require_n_gpus_for_nccl_backend,
+     requires_nccl_version,
+@@ -5299,7 +5298,7 @@ class DistributedTest:
+             BACKEND != "mpi" and BACKEND != "nccl" and BACKEND != "gloo",
+             "get_future is only supported on mpi, nccl and gloo",
+         )
+-        @nccl_skip_if_lt_x_gpu(BACKEND, 2)
++        @require_n_gpus_for_nccl_backend(2, BACKEND)
+         def test_accumulate_gradients_no_sync(self):
+             """
+             Runs _test_accumulate_gradients_no_sync using default inputs
+@@ -5310,7 +5309,7 @@ class DistributedTest:
+             BACKEND != "mpi" and BACKEND != "nccl" and BACKEND != "gloo",
+             "get_future is only supported on mpi, nccl and gloo",
+         )
+-        @nccl_skip_if_lt_x_gpu(BACKEND, 2)
++        @require_n_gpus_for_nccl_backend(2, BACKEND)
+         def test_accumulate_gradients_no_sync_grad_is_view(self):
+             """
+             Runs _test_accumulate_gradients_no_sync using default inputs
+@@ -5321,7 +5320,7 @@ class DistributedTest:
+             BACKEND != "mpi" and BACKEND != "nccl" and BACKEND != "gloo",
+             "get_future is only supported on mpi, nccl and gloo",
+         )
+-        @nccl_skip_if_lt_x_gpu(BACKEND, 2)
++        @require_n_gpus_for_nccl_backend(2, BACKEND)
+         def test_accumulate_gradients_no_sync_allreduce_hook(self):
+             """
+             Runs multiple iterations on _test_accumulate_gradients_no_sync
+@@ -5349,7 +5348,7 @@ class DistributedTest:
+             BACKEND != "mpi" and BACKEND != "nccl" and BACKEND != "gloo",
+             "get_future is only supported on mpi, nccl and gloo",
+         )
+-        @nccl_skip_if_lt_x_gpu(BACKEND, 2)
++        @require_n_gpus_for_nccl_backend(2, BACKEND)
+         def test_accumulate_gradients_no_sync_allreduce_with_then_hook(self):
+             """
+             Runs multiple iterations on _test_accumulate_gradients_no_sync using allreduce
+@@ -5383,7 +5382,7 @@ class DistributedTest:
+             BACKEND != "mpi" and BACKEND != "nccl" and BACKEND != "gloo",
+             "get_future is only supported on mpi, nccl and gloo",
+         )
+-        @nccl_skip_if_lt_x_gpu(BACKEND, 2)
++        @require_n_gpus_for_nccl_backend(2, BACKEND)
+         def test_get_future(self):
+             def mult(fut):
+                 return [t * 3 for t in fut.wait()]
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-sympy-1.13-compat.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-sympy-1.13-compat.patch
new file mode 100644
index 000000000000..4618da11c9b4
--- /dev/null
+++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-sympy-1.13-compat.patch
@@ -0,0 +1,24 @@
+There is an assertion failure with sympy 1.13.3:
+>   File "torch/fx/experimental/symbolic_shapes.py", line 6403, in _evaluate_expr
+>    assert orig_expr == hint, f"{orig_expr} != {hint}"
+>           ^^^^^^^^^^^^^^^^^
+> AssertionError: 0 != 0.0
+
+Backport of https://github.com/pytorch/pytorch/pull/147197
+
+Author: Alexander Grund (TU Dresden)
+
+--- a/torch/fx/experimental/symbolic_shapes.py
++++ b/torch/fx/experimental/symbolic_shapes.py
+@@ -6400,7 +6400,10 @@ class ShapeEnv:
+             if orig_expr.is_number:
+                 self.log.debug("eval %s [trivial]", orig_expr)
+                 if hint is not None:
+-                    assert orig_expr == hint, f"{orig_expr} != {hint}"
++                    if isinstance(hint, bool):
++                        assert orig_expr == hint, f"{orig_expr} != {hint}"
++                    else:
++                        assert sympy.Eq(orig_expr, hint), f"{orig_expr} != {hint}"
+                 return orig_expr
+ 
+             expr = orig_expr
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-test_autograd_cpp_node_saved_float.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-test_autograd_cpp_node_saved_float.patch
new file mode 100644
index 000000000000..d508b09164bf
--- /dev/null
+++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-test_autograd_cpp_node_saved_float.patch
@@ -0,0 +1,17 @@
+Fix failure in TestCompiledAutograd.test_autograd_cpp_node_saved_float
+> Expected 3 but got 4.
+
+Taken from https://github.com/pytorch/pytorch/pull/143247
+--- a/test/inductor/test_compiled_autograd.py
++++ b/test/inductor/test_compiled_autograd.py
+@@ -2424,7 +2424,9 @@ def fn():
+                 yield x.grad
+ 
+         # compiled autograd and dynamo both support symfloat, but not backend
+-        self.check_output_and_recompiles(fn, [1, 3])
++        self.check_output_and_recompiles(fn, [1, 4])
++        # 1 restart analysis due to specialize_float=False
++        self.assertEqual(counters["stats"]["unique_graphs"], 3)
+ 
+     @scoped_load_inline
+     def test_autograd_cpp_node_data_dependent(self, load_inline):
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-test_linear_with_embedding.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-test_linear_with_embedding.patch
new file mode 100644
index 000000000000..94591cd56888
--- /dev/null
+++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-test_linear_with_embedding.patch
@@ -0,0 +1,36 @@
+TestSelectAlgorithmCPU.test_linear_with_embedding fails when the CPU does not support BF16:
+> torch._inductor.exc.InductorError: LoweringException: RuntimeError: self and mat2 must have the same dtype, but got Float and BFloat16
+See https://github.com/pytorch/pytorch/issues/147104
+
+Convert the embedding layer to avoid it using "Float" and adapt the check for this change.
+
+Author: Alexander Grund (TU Dresden)
+--- a/test/inductor/test_cpu_select_algorithm.py
++++ b/test/inductor/test_cpu_select_algorithm.py
+@@ -932,6 +932,7 @@ class TestSelectAlgorithm(BaseTestSelectAlgorithm):
+     def test_linear_with_embedding(
+         self, batch_size, in_features, out_features, bias, dtype
+     ):
++        has_bf16 = torch.ops.mkldnn._is_mkldnn_bf16_supported()
+         class M(torch.nn.Module):
+             def __init__(self, bias):
+                 super().__init__()
+@@ -939,6 +940,9 @@ class TestSelectAlgorithm(BaseTestSelectAlgorithm):
+                     dtype=dtype
+                 )
+                 self.emb = torch.nn.Embedding(64, out_features)
++                if not has_bf16:
++                    self.emb = self.emb.to(dtype=dtype)
++
+ 
+             def forward(self, idx, x):
+                 return self.emb(idx) + self.linear(x)
+@@ -949,7 +953,7 @@ class TestSelectAlgorithm(BaseTestSelectAlgorithm):
+         with verify(dtype) as (atol, rtol):
+             self.common(mod, (idx, x), atol=atol, rtol=rtol)
+         self.assertEqual(counters["inductor"]["select_algorithm_autotune"], 1)
+-        self.assertEqual(counters["inductor"]["cpp_epilogue_fusion_counter"], 1)
++        self.assertEqual(counters["inductor"]["cpp_epilogue_fusion_counter"], 1 if has_bf16 else 0)
+ 
+     @inductor_config.patch({"freezing": True})
+     @patches
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-test_linear_with_in_out_buffer-without-mkl.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-test_linear_with_in_out_buffer-without-mkl.patch
new file mode 100644
index 000000000000..a6f086a36daf
--- /dev/null
+++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-test_linear_with_in_out_buffer-without-mkl.patch
@@ -0,0 +1,16 @@
+This test fails when FlexiBLAS is used instead of MKL.
+Adjust the expected count.
+See https://github.com/pytorch/pytorch/pull/151548
+
+Author: Alexander Grund (TU Dresden)
+--- a/test/inductor/test_cpu_select_algorithm.py
++++ b/test/inductor/test_cpu_select_algorithm.py
+@@ -1301,7 +1301,7 @@ def forward(self, arg152_1):
+                 rtol=rtol,
+             )
+         self.assertEqual(counters["inductor"]["select_algorithm_autotune"], 2)
+-        self.assertEqual(counters["inductor"]["cpp_epilogue_fusion_counter"], 2)
++        self.assertEqual(counters["inductor"]["cpp_epilogue_fusion_counter"], 2 if TEST_MKL else 1)
+ 
+     @inductor_config.patch({"freezing": True})
+     @patches
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-test_public_bindings.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-test_public_bindings.patch
new file mode 100644
index 000000000000..18f34a0402fd
--- /dev/null
+++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-test_public_bindings.patch
@@ -0,0 +1,32 @@
+From bf4f8919df8ee88e356b407bb84ed818ebfb407b Mon Sep 17 00:00:00 2001
+From: Howard Huang <howardhuang@meta.com>
+Date: Wed, 22 Jan 2025 10:12:15 -0800
+Subject: [PATCH] Fix test_modules_can_be_imported (#145387)
+
+`test_modules_can_be_imported` test is currently failing due to a few missing private modules and this PR gets it working before I start to clean up the public allow list
+Pull Request resolved: https://github.com/pytorch/pytorch/pull/145387
+Approved by: https://github.com/albanD
+---
+ test/test_public_bindings.py | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/test/test_public_bindings.py b/test/test_public_bindings.py
+index c4e819704f17b..61065fcd629d2 100644
+--- a/test/test_public_bindings.py
++++ b/test/test_public_bindings.py
+@@ -308,6 +308,7 @@ def onerror(modname):
+             "torch.onnx._internal.exporter._reporting",
+             "torch.onnx._internal.exporter._schemas",
+             "torch.onnx._internal.exporter._tensors",
++            "torch.onnx._internal.exporter._torchlib.ops",
+             "torch.onnx._internal.exporter._verification",
+             "torch.onnx._internal.fx._pass",
+             "torch.onnx._internal.fx.analysis",
+@@ -377,6 +378,7 @@ def onerror(modname):
+             "torch.distributed._spmd.experimental_ops",
+             "torch.distributed._spmd.parallel_mode",
+             "torch.distributed._tensor",
++            "torch.distributed._tools.sac_ilp",
+             "torch.distributed.algorithms._checkpoint.checkpoint_wrapper",
+             "torch.distributed.algorithms._optimizer_overlap",
+             "torch.distributed.rpc._testing.faulty_agent_backend_registry",
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-test_unbacked_bindings_for_divisible_u_symint.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-test_unbacked_bindings_for_divisible_u_symint.patch
new file mode 100644
index 000000000000..345359eef555
--- /dev/null
+++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-test_unbacked_bindings_for_divisible_u_symint.patch
@@ -0,0 +1,78 @@
+Fix from https://github.com/pytorch/pytorch/pull/145315 for test_unbacked_bindings_for_divisible_u_symint
+often failing with
+> Tried to register an operator (mylib::foo(Tensor a, Tensor b) -> Tensor) with the same name and overload name multiple times.
+
+Trivial backport
+Author: Alexander Grund (TU Dresden)
+--- a/test/export/test_export.py
++++ b/test/export/test_export.py
+@@ -3049,43 +3049,35 @@ def forward(self, x):
+     @testing.expectedFailureCppSerDes  # no unbacked bindings after deserialization?
+     @testing.expectedFailureSerDerNonStrict
+     def test_unbacked_bindings_for_divisible_u_symint(self):
+-        with torch.library._scoped_library("mylib", "FRAGMENT") as lib:
+-            torch.library.define(
+-                "mylib::foo",
+-                "(Tensor a, Tensor b) -> (Tensor)",
+-                tags=torch.Tag.pt2_compliant_tag,
+-                lib=lib,
+-            )
+-
+-            class M(torch.nn.Module):
+-                def forward(self, a, b):
+-                    return torch.ops.mylib.foo(a, b)
++        class M(torch.nn.Module):
++            def forward(self, a, b):
++                return torch.ops.mylib.foo_unbacked(a, b)
+ 
+-            @torch.library.impl("mylib::foo", "cpu", lib=lib)
+-            def foo_impl(a, b):
+-                return a[b.item()]
++        @torch.library.custom_op("mylib::foo_unbacked", mutates_args={})
++        def foo_unbacked(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
++            return a[b.item()]
+ 
+-            @torch.library.register_fake("mylib::foo", lib=lib)
+-            def foo_fake_impl(a, b):
+-                ctx = torch.library.get_ctx()
+-                u = ctx.new_dynamic_size(min=0, max=len(a) // 10) * 10
+-                return torch.empty(u, a.shape[1], dtype=a.dtype)
++        @foo_unbacked.register_fake
++        def foo_unbacked_fake_impl(a, b):
++            ctx = torch.library.get_ctx()
++            u = ctx.new_dynamic_size(min=0, max=len(a) // 10) * 10
++            return torch.empty(u, a.shape[1], dtype=a.dtype)
+ 
+-            ep = export(
+-                M(),
+-                (torch.randn(100, 4), torch.tensor(10)),
+-            )
+-            foo = [node for node in ep.graph.nodes if node.name == "foo"][0]
+-            unbacked_bindings = foo.meta["unbacked_bindings"]
+-            self.assertEqual(len(unbacked_bindings), 1)  # check binding is {u: path}
+-            u = next(iter(unbacked_bindings.keys()))
+-            self.assertEqual(
+-                type(u).__name__, "Symbol"
+-            )  # check binding is symbol, not expr
+-            path = unbacked_bindings[u]
+-            self.assertEqual(len(path), 3)  # check path is [size, 0, DivideByKey(10)]
+-            self.assertEqual(type(path[2]).__name__, "DivideByKey")
+-            self.assertEqual(path[2].divisor, 10)
++        ep = export(
++            M(),
++            (torch.randn(100, 4), torch.tensor(10)),
++        )
++        foo = [node for node in ep.graph.nodes if node.name == "foo_unbacked"][0]
++        unbacked_bindings = foo.meta["unbacked_bindings"]
++        self.assertEqual(len(unbacked_bindings), 1)  # check binding is {u: path}
++        u = next(iter(unbacked_bindings.keys()))
++        self.assertEqual(
++            type(u).__name__, "Symbol"
++        )  # check binding is symbol, not expr
++        path = unbacked_bindings[u]
++        self.assertEqual(len(path), 3)  # check path is [size, 0, DivideByKey(10)]
++        self.assertEqual(type(path[2]).__name__, "DivideByKey")
++        self.assertEqual(path[2].divisor, 10)
+ 
+     def test_torch_check_eq_commutativity(self):
+         class M1(torch.nn.Module):
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-vsx-vector-shift-functions.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-vsx-vector-shift-functions.patch
new file mode 100644
index 000000000000..bad316bd3ca6
--- /dev/null
+++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-vsx-vector-shift-functions.patch
@@ -0,0 +1,173 @@
+The VSX vector shift operators don't handle out-of-bounds values correctly.
+See https://github.com/pytorch/pytorch/issues/109777
+Taken from https://github.com/pytorch/pytorch/pull/150524
+
+Author: Alexander Grund (TU Dresden)
+
+diff --git a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int16_vsx.h b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int16_vsx.h
+index ae146dae4d42a5..b9e8ac36391c4d 100644
+--- a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int16_vsx.h
++++ b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int16_vsx.h
+@@ -334,20 +334,6 @@ class Vectorized<int16_t> {
+   DEFINE_MEMBER_OP(operator^, int16_t, vec_xor)
+ };
+ 
+-template <>
+-Vectorized<int16_t> inline operator<<(const Vectorized<int16_t>& a, const Vectorized<int16_t>& b) {
+-               vuint16 shift_vec0 = reinterpret_cast<vuint16>(b.vec0());
+-               vuint16 shift_vec1 = reinterpret_cast<vuint16>(b.vec1());
+-         return Vectorized<int16_t>{vec_sl(a.vec0(), shift_vec0), vec_sl(a.vec1(), shift_vec1)};
+-}
+-
+-template <>
+-Vectorized<int16_t> inline operator>>(const Vectorized<int16_t>& a, const Vectorized<int16_t>& b) {
+-               vuint16 shift_vec0 = reinterpret_cast<vuint16>(b.vec0());
+-               vuint16 shift_vec1 = reinterpret_cast<vuint16>(b.vec1()) ;
+-         return Vectorized<int16_t>{vec_sr(a.vec0(), shift_vec0), vec_sr(a.vec1(), shift_vec1)};
+-}
+-
+ template <>
+ Vectorized<int16_t> inline maximum(
+     const Vectorized<int16_t>& a,
+@@ -362,6 +348,8 @@ Vectorized<int16_t> inline minimum(
+   return a.minimum(b);
+ }
+ 
++DEFINE_SHIFT_FUNCS(int16_t)
++
+ template <>
+ Vectorized<int16_t> C10_ALWAYS_INLINE operator+(const Vectorized<int16_t>& a, const Vectorized<int16_t>& b) {
+   return Vectorized<int16_t>{vec_add(a.vec0(), b.vec0()), vec_add(a.vec1(), b.vec1())};
+diff --git a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int32_vsx.h b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int32_vsx.h
+index 98401381c6e822..acb7bd0a5d6690 100644
+--- a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int32_vsx.h
++++ b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int32_vsx.h
+@@ -265,20 +265,6 @@ class Vectorized<int32_t> {
+   DEFINE_MEMBER_OP(operator^, int32_t, vec_xor)
+ };
+ 
+-template <>
+-Vectorized<int32_t> inline operator<<(const Vectorized<int32_t>& a, const Vectorized<int32_t>& b) {
+-                vuint32 shift_vec0 = reinterpret_cast<vuint32>(b.vec0());
+-                vuint32 shift_vec1 = reinterpret_cast<vuint32>(b.vec1()) ;
+-          return Vectorized<int32_t>{vec_sl(a.vec0(), shift_vec0), vec_sl(a.vec1(), shift_vec1)};
+-}
+-
+-template <>
+-Vectorized<int32_t> inline operator>>(const Vectorized<int32_t>& a, const Vectorized<int32_t>& b) {
+-                vuint32 shift_vec0 = reinterpret_cast<vuint32>(b.vec0());
+-                vuint32 shift_vec1 = reinterpret_cast<vuint32>(b.vec1()) ;
+-          return Vectorized<int32_t>{vec_sr(a.vec0(), shift_vec0), vec_sr(a.vec1(), shift_vec1)};
+-}
+-
+ template <>
+ Vectorized<int32_t> inline maximum(
+     const Vectorized<int32_t>& a,
+@@ -293,6 +279,8 @@ Vectorized<int32_t> inline minimum(
+   return a.minimum(b);
+ }
+ 
++DEFINE_SHIFT_FUNCS(int32_t)
++
+ template <>
+ Vectorized<int32_t> C10_ALWAYS_INLINE operator+(const Vectorized<int32_t>& a, const Vectorized<int32_t>& b) {
+   return Vectorized<int32_t>{vec_add(a.vec0(), b.vec0()), vec_add(a.vec1(), b.vec1())};
+diff --git a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int64_vsx.h b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int64_vsx.h
+index f8217930fa4989..1744a7cc465ad3 100644
+--- a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int64_vsx.h
++++ b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int64_vsx.h
+@@ -218,20 +218,6 @@ class Vectorized<int64_t> {
+   DEFINE_MEMBER_OP(operator^, int64_t, vec_xor)
+ };
+ 
+-template <>
+-Vectorized<int64_t> inline operator<<(const Vectorized<int64_t>& a, const Vectorized<int64_t>& b) {
+-                vuint64 shift_vec0 = reinterpret_cast<vuint64>(b.vec0());
+-                vuint64 shift_vec1 = reinterpret_cast<vuint64>(b.vec1()) ;
+-          return Vectorized<int64_t>{vec_sl(a.vec0(), shift_vec0), vec_sl(a.vec1(), shift_vec1)};
+-}
+-
+-template <>
+-Vectorized<int64_t> inline operator>>(const Vectorized<int64_t>& a, const Vectorized<int64_t>& b) {
+-                vuint64 shift_vec0 = reinterpret_cast<vuint64>(b.vec0());
+-                vuint64 shift_vec1 = reinterpret_cast<vuint64>(b.vec1()) ;
+-          return Vectorized<int64_t>{vec_sr(a.vec0(), shift_vec0), vec_sr(a.vec1(), shift_vec1)};
+-}
+-
+ template <>
+ Vectorized<int64_t> inline maximum(
+     const Vectorized<int64_t>& a,
+@@ -246,6 +232,8 @@ Vectorized<int64_t> inline minimum(
+   return a.minimum(b);
+ }
+ 
++DEFINE_SHIFT_FUNCS(int64_t)
++
+ template <>
+ Vectorized<int64_t> C10_ALWAYS_INLINE operator+(const Vectorized<int64_t>& a, const Vectorized<int64_t>& b) {
+   return Vectorized<int64_t>{vec_add(a.vec0(), b.vec0()), vec_add(a.vec1(), b.vec1())};
+diff --git a/aten/src/ATen/cpu/vec/vec256/vsx/vsx_helpers.h b/aten/src/ATen/cpu/vec/vec256/vsx/vsx_helpers.h
+index 1dc742f3cbb1c2..12e3fc446a600f 100644
+--- a/aten/src/ATen/cpu/vec/vec256/vsx/vsx_helpers.h
++++ b/aten/src/ATen/cpu/vec/vec256/vsx/vsx_helpers.h
+@@ -2,6 +2,7 @@
+ #include <cstdint>
+ #include <c10/macros/Macros.h>
+ #include <ATen/cpu/vec/intrinsics.h>
++#include <ATen/cpu/vec/vec_base.h>
+ 
+ #if defined(__clang__)
+ typedef __vector __bool char vbool8;
+@@ -35,6 +36,11 @@ using vfloat32 =  __attribute__((altivec(vector__)))  float;
+ using vfloat64 =  __attribute__((altivec(vector__)))  double;
+ #endif
+ 
++inline auto make_vuint(vint8 v){ return reinterpret_cast<vuint8>(v); }
++inline auto make_vuint(vint16 v){ return reinterpret_cast<vuint16>(v); }
++inline auto make_vuint(vint32 v){ return reinterpret_cast<vuint32>(v); }
++inline auto make_vuint(vint64 v){ return reinterpret_cast<vuint64>(v); }
++
+ #if !defined(vec_float)
+ C10_ALWAYS_INLINE vfloat32 vec_float(const vint32& vec_in) {
+   vfloat32 vec_out;
+@@ -469,6 +475,40 @@ const vfloat64 vd_imag_half = vfloat64{0.0, 0.5};
+ const vfloat64 vd_sqrt2_2 = vfloat64{0.70710678118654757, 0.70710678118654757};
+ const vfloat64 vd_pi_2 = vfloat64{M_PI / 2.0, 0.0};
+ 
++template<typename T>
++Vectorized<T> VsxShiftRightArith(const Vectorized<T>& a, const Vectorized<T>& b) {
++  const Vectorized<T> max_shift(sizeof(T) * CHAR_BIT - std::is_signed_v<T>);
++  const auto mask = (b < Vectorized<T>(0)) | (b >= max_shift);
++  const auto shift = Vectorized<T>::blendv(b, max_shift, mask);
++  return Vectorized<T>{
++    vec_sra(a.vec0(), make_vuint(shift.vec0())),
++    vec_sra(a.vec1(), make_vuint(shift.vec1()))};
++}
++
++template<typename T>
++Vectorized<T> VsxShiftLeftArith(const Vectorized<T>& a, const Vectorized<T>& b) {
++  const Vectorized<T> max_shift(sizeof(T) * CHAR_BIT);
++  const auto mask = (b < Vectorized<T>(0)) | (b >= max_shift);
++  Vectorized<T> ret(
++    vec_sl(a.vec0(), make_vuint(b.vec0())),
++    vec_sl(a.vec1(), make_vuint(b.vec1())));
++  return Vectorized<T>::blendv(ret, Vectorized<T>(0), mask);
++}
++
++#define DEFINE_SHIFT_FUNCS(operand_type)                 \
++  template <>                                            \
++  Vectorized<operand_type> C10_ALWAYS_INLINE operator>>( \
++      const Vectorized<operand_type>& a,                 \
++      const Vectorized<operand_type>& b) {               \
++    return VsxShiftRightArith(a, b);                     \
++  }                                                      \
++  template <>                                            \
++  Vectorized<operand_type> C10_ALWAYS_INLINE operator<<( \
++      const Vectorized<operand_type>& a,                 \
++      const Vectorized<operand_type>& b) {               \
++    return VsxShiftLeftArith(a, b);                      \
++  }                                                      \
++
+ } // namespace
+ } // namespace vec
+ } // namespace at
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-xnnpack-float16-convert.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-xnnpack-float16-convert.patch
new file mode 100644
index 000000000000..c8f7490acb2a
--- /dev/null
+++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_fix-xnnpack-float16-convert.patch
@@ -0,0 +1,36 @@
+Backport of https://github.com/google/XNNPACK/commit/5f23827e66cca435fa400b6e221892ac95af0079 to fix
+> unary-elementwise.cc:125:14: error: invalid 'static_cast' from type 'xnn_bfloat16' to type '_Float16'
+
+Author: Alexander Grund (TU Dresden)
+diff -ur a/third_party/XNNPACK/src/reference/unary-elementwise.cc b/third_party/XNNPACK/src/reference/unary-elementwise.cc
+--- a/third_party/XNNPACK/src/reference/unary-elementwise.cc	2025-04-02 11:29:03.536599185 +0200
++++ b/third_party/XNNPACK/src/reference/unary-elementwise.cc	2025-04-02 11:31:46.182485083 +0200
+@@ -127,6 +127,16 @@
+   }
+ };
+ 
++#ifdef XNN_HAVE_FLOAT16
++template <>
++struct ConvertOp<xnn_bfloat16, _Float16> {
++  explicit ConvertOp(const xnn_unary_uparams*) {}
++  _Float16 operator()(xnn_bfloat16 x) const {
++    return static_cast<_Float16>(static_cast<float>(x));
++  }
++};
++#endif
++
+ template <typename TIn, typename TOut>
+ const xnn_unary_elementwise_config* get_convert_config(
+     std::true_type /*input_quantized*/, std::true_type /*output_quantized*/) {
+diff -ur a/third_party/XNNPACK/src/xnnpack/simd/s16-neon.h b/third_party/XNNPACK/src/xnnpack/simd/s16-neon.h
+--- a/third_party/XNNPACK/src/xnnpack/simd/s16-neon.h	2025-04-02 11:29:03.497600412 +0200
++++ b/third_party/XNNPACK/src/xnnpack/simd/s16-neon.h	2025-04-02 11:30:22.054130343 +0200
+@@ -70,7 +70,7 @@
+     v_low = vget_high_s16(v);
+   }
+   if (num_elements & 2) {
+-    vst1_lane_s32((void*) output, vreinterpret_s32_s16(v_low), 0);
++    vst1_lane_s32((int32_t*) output, vreinterpret_s32_s16(v_low), 0);
+     output += 2;
+     v_low = vext_s16(v_low, v_low, 2);
+   }
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_increase-tolerance-test_aotdispatch-matmul.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_increase-tolerance-test_aotdispatch-matmul.patch
new file mode 100644
index 000000000000..aa3113072052
--- /dev/null
+++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_increase-tolerance-test_aotdispatch-matmul.patch
@@ -0,0 +1,36 @@
+test_aot_autograd_exhaustive_matmul_cpu_float32 and test_aot_autograd_exhaustive___rmatmul___cpu_float32
+fail when using OpenBLAS instead of MKL:
+
+> Mismatched elements: 1 / 10 (10.0%)
+> Greatest absolute difference: 5.91278076171875e-05 at index (7,) (up to 1e-05 allowed)
+> Greatest relative difference: 3.468156592134619e-06 at index (7,) (up to 1.3e-06 allowed)
+
+Relax the tolerances to allow it to pass.
+
+Author: Alexander Grund (TU Dresden)
+
+diff --git a/test/functorch/test_aotdispatch.py b/test/functorch/test_aotdispatch.py
+index 6213f8f0817..b7748ad8707 100644
+--- a/test/functorch/test_aotdispatch.py
++++ b/test/functorch/test_aotdispatch.py
+@@ -74,6 +74,7 @@ from torch.testing._internal.common_utils import (
+     skipIfRocm,
+     skipIfTorchDynamo,
+     TestCase,
++    TEST_MKL,
+     xfail_inherited_tests,
+     xfailIfS390X,
+     xfailIfTorchDynamo,
+@@ -6434,6 +6435,12 @@ aot_autograd_failures = {
+     decorate("nn.functional.conv2d", decorator=unittest.skipIf(IS_ARM64, "flaky")),
+ }
+ 
++if not TEST_MKL:
++    aot_autograd_failures.update({
++        decorate("matmul", decorator=toleranceOverride({torch.float32: tol(atol=6e-05, rtol=4e-06)})),
++        decorate("__rmatmul__", decorator=toleranceOverride({torch.float32: tol(atol=6e-05, rtol=4e-06)})),
++    })
++
+ symbolic_aot_autograd_failures = {
+     xfail("combinations", ""),  # aten.masked_select.default
+     xfail(
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_increase-tolerance-test_quick-baddbmm.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_increase-tolerance-test_quick-baddbmm.patch
new file mode 100644
index 000000000000..e1018a5ce66f
--- /dev/null
+++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_increase-tolerance-test_quick-baddbmm.patch
@@ -0,0 +1,23 @@
+Avoid a test failure in test_quick_baddbmm_cpu_complex64 in test_decomp.py
+on e.g. AMD EPYC (znver2) or Intel Sapphire Rapids:
+
+> AssertionError: Tensor-likes are not close!
+> Greatest absolute difference: 3.43852152582258e-05 at index (1, 2, 1) (up to 1e-05 allowed)
+> Greatest relative difference: 3.6034286949870875e-06 at index (1, 2, 1) (up to 1.3e-06 allowed)
+
+The failure doesn't happen with e.g. `-march=znver1` and is small enough to ignore.
+
+Author: Alexander Grund (TU Dresden)
+
+--- a/torch/testing/_internal/common_methods_invocations.py
++++ b/torch/testing/_internal/common_methods_invocations.py
+@@ -12259,6 +12259,9 @@ op_db: List[OpInfo] = [
+                DecorateInfo(
+                    toleranceOverride({torch.complex64: tol(atol=1e-05, rtol=1.2e-03)}),
+                    'TestCommon', 'test_variant_consistency_eager', device_type='cuda'),
++               DecorateInfo(
++                   toleranceOverride({torch.complex64: tol(atol=4e-05, rtol=4e-06)}),
++                   'TestDecomp', 'test_quick'),
+                DecorateInfo(
+                    toleranceOverride({torch.complex64: tol(atol=1e-05, rtol=1.2e-03)}),
+                    'TestMathBits', 'test_conj_view', device_type='cuda'),
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_increase-tolerance-test_vmap_autograd_grad.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_increase-tolerance-test_vmap_autograd_grad.patch
new file mode 100644
index 000000000000..0529e9d78ad2
--- /dev/null
+++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_increase-tolerance-test_vmap_autograd_grad.patch
@@ -0,0 +1,25 @@
+TestOperatorsCPU.test_vmap_autograd_grad_nn_functional_conv2d_cpu_float32 fails with:
+> AssertionError: Tensor-likes are not close!
+>
+> Mismatched elements: 2 / 144 (1.4%)
+> Greatest absolute difference: 1.1444091796875e-05 at index (0, 4, 0, 0, 2) (up to 1e-05 allowed)
+> Greatest relative difference: 2.064850013994146e-05 at index (0, 4, 0, 0, 2) (up to 1.3e-06 allowed)
+>
+> The failure occurred for item [1]
+See https://github.com/pytorch/pytorch/issues/151113
+Increase the tolerance to the same as for CUDA.
+
+Author: Alexander Grund (TU Dresden)
+
+diff --git a/test/functorch/test_ops.py b/test/functorch/test_ops.py
+index a4269ff84d5..f46189909e5 100644
+--- a/test/functorch/test_ops.py
++++ b/test/functorch/test_ops.py
+@@ -2414,7 +2414,6 @@ class TestOperators(TestCase):
+             tol1(
+                 "nn.functional.conv2d",
+                 {torch.float32: tol(atol=5e-05, rtol=5e-05)},
+-                device_type="cuda",
+             ),
+             tol1("svd_lowrank", {torch.float32: tol(atol=5e-05, rtol=5e-05)}),
+             tol1("pca_lowrank", {torch.float32: tol(atol=5e-05, rtol=5e-05)}),
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_remove-test_slice_with_floordiv.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_remove-test_slice_with_floordiv.patch
new file mode 100644
index 000000000000..acbfbbedf26b
--- /dev/null
+++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_remove-test_slice_with_floordiv.patch
@@ -0,0 +1,51 @@
+Remove TestExport.test_slice_with_floordiv that does not trigger required runtime assertion
+
+Ported from https://github.com/pytorch/pytorch/pull/145292
+
+Author: Alexander Grund (TU Dresden)
+
+--- a/test/export/test_export.py
++++ b/test/export/test_export.py
+@@ -9947,42 +9947,6 @@ def forward(self, x, y):
+             ep.graph_module.code
+         )
+ 
+-    @testing.expectedFailureCppSerDes
+-    @testing.expectedFailureLegacyExportNonStrict
+-    @testing.expectedFailureLegacyExportStrict
+-    def test_slice_with_floordiv(self):
+-        # slice operation emits runtime assert s0//2 <= s1
+-        class M1(torch.nn.Module):
+-            def forward(self, x, y):
+-                d = x.size(0) // 2
+-                return y[d:]
+-
+-        class M(torch.nn.Module):
+-            def __init__(self) -> None:
+-                super().__init__()
+-                self.m1 = M1()
+-
+-            def forward(self, x, y):
+-                d = x.size(0) // 2
+-                m1_res = self.m1(x, y)
+-                return y[d:] + m1_res
+-
+-        inputs = (torch.ones(10), torch.ones(10))
+-        d0 = torch.export.Dim("d0", max=2048)
+-        d1 = torch.export.Dim("d1", max=2048)
+-        ep = export(
+-            M(),
+-            inputs,
+-            dynamic_shapes=((d0,), (d1,)),
+-        )
+-        ep.module()(torch.ones(8), torch.ones(4))
+-        ep.module()(torch.ones(8), torch.ones(5))
+-        with self.assertRaisesRegex(
+-            RuntimeError,
+-            r"Runtime assertion failed for expression \(s0//2\) \<\= s1",
+-        ):
+-            ep.module()(torch.ones(10), torch.ones(4))
+-
+     def test_split_const_gm_with_lifted_constants(self):
+         class Model(torch.nn.Module):
+             def __init__(self) -> None:
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-diff-test-on-ppc.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-diff-test-on-ppc.patch
new file mode 100644
index 000000000000..b8f6222dcc42
--- /dev/null
+++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-diff-test-on-ppc.patch
@@ -0,0 +1,26 @@
+The workaround for over/underflow isn't implemented for PPC yet.
+So skip the test.
+See https://github.com/pytorch/pytorch/issues/109870
+
+Author: Alexander Grund (TU Dresden)
+
+diff --git a/test/test_binary_ufuncs.py b/test/test_binary_ufuncs.py
+index ee9fb490356..ba18b28adeb 100644
+--- a/test/test_binary_ufuncs.py
++++ b/test/test_binary_ufuncs.py
+@@ -66,6 +66,7 @@ from torch.testing._internal.common_utils import (
+     TestCase,
+     torch_to_numpy_dtype_dict,
+     xfailIfTorchDynamo,
++    IS_PPC,
+ )
+ 
+ 
+@@ -1115,6 +1116,7 @@ class TestBinaryUfuncs(TestCase):
+             )
+ 
+     @dtypes(*complex_types())
++    @skipIf(IS_PPC, "Vectorized div fails on PPC: #109870")
+     def test_complex_div_underflow_overflow(self, device, dtype):
+         # test to make sure the complex division does not produce underflow or overflow
+         # in the intermediate of its calculations
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-test-requiring-MKL.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-test-requiring-MKL.patch
new file mode 100644
index 000000000000..a5369ff3398a
--- /dev/null
+++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-test-requiring-MKL.patch
@@ -0,0 +1,14 @@
+The test checks for a fusion relying on MKL being available so fails without:
+> AssertionError: 'mkl._mkl_linear' not found in ...
+
+Author: Alexander Grund (TU Dresden)
+--- a/test/inductor/test_mkldnn_pattern_matcher.py
++++ b/test/inductor/test_mkldnn_pattern_matcher.py
+@@ -3157,6 +3157,7 @@ class TestPatternMatcher(TestPatternMatcherBase):
+             om(*example_inputs)
+             om(*example_inputs)
+ 
++    @unittest.skipIf(not TEST_MKL, "Test requires MKL")
+     @xfailIfACL
+     @torch._dynamo.config.patch("inline_inbuilt_nn_modules", True)
+     def test_reproduce_121253_issue_addmm_fusion_check(self):
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-test_checkpoint_wrapper_parity-on-cpu.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-test_checkpoint_wrapper_parity-on-cpu.patch
new file mode 100644
index 000000000000..2d277a433896
--- /dev/null
+++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-test_checkpoint_wrapper_parity-on-cpu.patch
@@ -0,0 +1,24 @@
+When no GPUs are available test_checkpoint_wrapper_parity fails with
+> AttributeError: module 'torch.cpu' has no attribute 'reset_peak_memory_stats'
+
+Author: Alexander Grund (TU Dresden)
+diff --git a/test/distributed/fsdp/test_checkpoint_wrapper.py b/test/distributed/fsdp/test_checkpoint_wrapper.py
+index 0f873b49297..afda0c13a6c 100644
+--- a/test/distributed/fsdp/test_checkpoint_wrapper.py
++++ b/test/distributed/fsdp/test_checkpoint_wrapper.py
+@@ -1,6 +1,7 @@
+ # Owner(s): ["oncall: distributed"]
+ 
+ import contextlib
++import unittest
+ from copy import deepcopy
+ from functools import partial
+ 
+@@ -132,6 +133,7 @@ class CheckpointWrapperTest(TestCase):
+         m(torch.randn(2, 1)).sum().backward()
+         self.assertEqual(2, count)
+ 
++    @unittest.skipIf(device_type.type == "cpu", "CPU does not support max_memory_allocated")
+     def test_checkpoint_wrapper_parity(self):
+         """
+         Tests that using checkpoint_wrapper or the functional
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-test_init_from_local_shards.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-test_init_from_local_shards.patch
new file mode 100644
index 000000000000..3e11a13d56da
--- /dev/null
+++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-test_init_from_local_shards.patch
@@ -0,0 +1,24 @@
+The test often times out and seems to be considered flaky by PyTorch:
+https://github.com/pytorch/pytorch/issues/78068
+
+Author: Alexander Grund (TU Dresden)
+diff --git a/test/distributed/_shard/sharded_tensor/test_sharded_tensor.py b/test/distributed/_shard/sharded_tensor/test_sharded_tensor.py
+index 730b2c2c0ac..5f9b9545700 100644
+--- a/test/distributed/_shard/sharded_tensor/test_sharded_tensor.py
++++ b/test/distributed/_shard/sharded_tensor/test_sharded_tensor.py
+@@ -7,6 +7,7 @@ import math
+ import pickle
+ import sys
+ from typing import List
++from unittest import skip
+ 
+ import torch
+ import torch.distributed as dist
+@@ -2426,6 +2427,7 @@ class TestShardedTensorFromLocalShards(ShardedTensorTestBase):
+     @with_comms
+     @skip_if_lt_x_gpu(4)
+     @requires_nccl()
++    @skip("Times out often")
+     def test_init_from_local_shards(self):
+         local_shard_metadata = ShardMetadata(
+             shard_offsets=[(self.rank // 2) * 5, (self.rank % 2) * 5],
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-test_jvp_linalg_det_singular.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-test_jvp_linalg_det_singular.patch
new file mode 100644
index 000000000000..f553f0d8b201
--- /dev/null
+++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-test_jvp_linalg_det_singular.patch
@@ -0,0 +1,48 @@
+test_jvp_linalg_det_singular_cpu_float32 in functorch/test_ops.py fails consistently with the error
+> Mismatched elements: 1 / 2 (50.0%)
+> Greatest absolute difference: 0.024928677827119827 at index (1,) (up to 1e-05 allowed)
+> Greatest relative difference: 1.0 at index (1,) (up to 1.3e-06 allowed)
+
+This is an already known issue on x86 MacOS, see https://github.com/pytorch/pytorch/issues/110980
+So just skip this test.
+
+Similar for test_vmapjvpall_linalg_det_singular_cpu_float32 and test_forward_mode_AD_linalg_det_singular_cpu_float64
+see https://github.com/pytorch/pytorch/issues/111583
+
+
+
+All det_singular are removed in 2.7: https://github.com/pytorch/pytorch/commit/3a3e2cf90a03fcf332a275f331fcb57e46d6c325
+
+Author: Alexander Grund (TU Dresden)
+diff --git a/test/functorch/test_ops.py b/test/functorch/test_ops.py
+index a4269ff84d5..c0fad294489 100644
+--- a/test/functorch/test_ops.py
++++ b/test/functorch/test_ops.py
+@@ -589,7 +589,7 @@ class TestOperators(TestCase):
+                 decorate(
+                     "linalg.det",
+                     "singular",
+-                    decorator=expectedFailureIf(IS_MACOS and IS_X86),
++                    decorator=unittest.skipIf(IS_X86, 'Known failure: #110980'),
+                 ),
+             }
+         ),
+@@ -1350,7 +1350,7 @@ class TestOperators(TestCase):
+                 decorate(
+                     "linalg.det",
+                     "singular",
+-                    decorator=expectedFailureIf(IS_MACOS and IS_X86),
++                    decorator=expectedFailureIf(IS_X86),
+                 ),
+             }
+         ),
+--- a/torch/testing/_internal/opinfo/definitions/linalg.py
++++ b/torch/testing/_internal/opinfo/definitions/linalg.py
+@@ -1233,7 +1233,6 @@ op_db: List[OpInfo] = [
+                 "test_forward_mode_AD",
+                 device_type="cpu",
+                 dtypes=(torch.float64,),
+-                active_if=IS_MACOS,
+             ),
+             # Both Hessians are incorrect on complex inputs??
+             DecorateInfo(
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-test_segfault.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-test_segfault.patch
new file mode 100644
index 000000000000..9a585ff53a77
--- /dev/null
+++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-test_segfault.patch
@@ -0,0 +1,16 @@
+The test succeeds although it seems to fail in some of the upstream CI configs.
+Just disable it.
+
+Author: Alexander Grund (TU Dresden)
+
+--- a/test/test_dataloader.py
++++ b/test/test_dataloader.py
+@@ -1388,7 +1388,7 @@ except RuntimeError as e:
+     # please don't forget to remove this skip when remove the xfailIfLinux.
+     @skipIfXpu
+     # https://github.com/pytorch/pytorch/issues/128551
+-    @xfailIfLinux
++    @unittest.skip("unexpected success on e.g. AMD CPU")
+     def test_segfault(self):
+         p = ErrorTrackingProcess(target=_test_segfault)
+         p.start()
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-tests-without-fbgemm.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-tests-without-fbgemm.patch
new file mode 100644
index 000000000000..3d985af079b9
--- /dev/null
+++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.6.0_skip-tests-without-fbgemm.patch
@@ -0,0 +1,50 @@
+Those tests (from test_ao_sparsity & test_quantization) require FBGEMM which may not be available.
+So add the skip decorator.
+See https://github.com/pytorch/pytorch/issues/87364
+
+Author: Alexander Grund (TU Dresden)
+
+diff --git a/test/ao/sparsity/test_composability.py b/test/ao/sparsity/test_composability.py
+index 1156a7ecb8a..4239196c293 100644
+--- a/test/ao/sparsity/test_composability.py
++++ b/test/ao/sparsity/test_composability.py
+@@ -14,6 +14,7 @@ from torch.ao.quantization.quantize_fx import (
+     prepare_fx,
+     prepare_qat_fx,
+ )
++from torch.testing._internal.common_quantization import skipIfNoFBGEMM
+ from torch.testing._internal.common_utils import TestCase, xfailIfS390X
+ 
+ 
+@@ -71,6 +71,7 @@ def _calculate_sparsity(tensor):
+ # This series of tests are to check the composability goals for sparsity and quantization. Namely
+ # that performing quantization and sparsity model manipulations in various orderings
+ # does not cause problems
++@skipIfNoFBGEMM
+ class TestComposability(TestCase):
+     # This test checks whether performing quantization prepare before sparse prepare
+     # causes any issues and verifies that the correct observers are inserted and that
+@@ -346,6 +347,7 @@ class TestFxComposability(TestCase):
+     """
+ 
+     @xfailIfS390X
++    @skipIfNoFBGEMM
+     def test_q_prep_fx_before_s_prep(self):
+         r"""
+         This test checks that the ordering of prepare_fx -> sparse prepare -> convert_fx
+@@ -480,6 +482,7 @@ class TestFxComposability(TestCase):
+         self.assertGreaterAlmostEqual(cur_sparsity, sparse_config[0]["sparsity_level"])
+ 
+     @xfailIfS390X
++    @skipIfNoFBGEMM
+     def test_s_prep_before_q_prep_fx(self):
+         r"""
+         This test checks that the ordering of sparse prepare -> prepare_fx -> convert_fx
+@@ -532,6 +535,7 @@ class TestFxComposability(TestCase):
+         self.assertGreaterAlmostEqual(cur_sparsity, sparse_config[0]["sparsity_level"])
+ 
+     @xfailIfS390X
++    @skipIfNoFBGEMM
+     def test_s_prep_before_qat_prep_fx(self):
+         r"""
+         This test checks that the ordering of sparse prepare -> prepare_qat_fx -> convert_fx
diff --git a/easybuild/easyconfigs/p/pytest-subtests/pytest-subtests-0.13.1-GCCcore-13.2.0.eb b/easybuild/easyconfigs/p/pytest-subtests/pytest-subtests-0.13.1-GCCcore-13.2.0.eb
new file mode 100644
index 000000000000..8a3c7154825a
--- /dev/null
+++ b/easybuild/easyconfigs/p/pytest-subtests/pytest-subtests-0.13.1-GCCcore-13.2.0.eb
@@ -0,0 +1,22 @@
+easyblock = 'PythonPackage'
+
+name = 'pytest-subtests'
+version = '0.13.1'
+
+homepage = 'https://github.com/pytest-dev/pytest-subtests'
+description = "unittest subTest() support and subtests fixture."
+
+toolchain = {'name': 'GCCcore', 'version': '13.2.0'}
+
+builddependencies = [
+    ('binutils', '2.40'),
+]
+dependencies = [
+    ('Python', '3.11.5'),
+    ('Python-bundle-PyPI', '2023.10'),
+]
+
+sources = [f'{name.replace("-", "_")}-%(version)s.tar.gz']
+checksums = ['989e38f0f1c01bc7c6b2e04db7d9fd859db35d77c2c1a430c831a70cbf3fde2d']
+
+moduleclass = 'tools'