From bb3da2103ffa06cdbb4bc55e331436f8e482b090 Mon Sep 17 00:00:00 2001 From: sassy Date: Tue, 11 Oct 2022 10:51:55 +0100 Subject: [PATCH 1/6] adding easyconfigs: PyTorch-1.11.0-foss-2021b-CUDA-11.4.1.eb --- .../PyTorch-1.11.0-foss-2021b-CUDA-11.4.1.eb | 126 ++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-1.11.0-foss-2021b-CUDA-11.4.1.eb diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.11.0-foss-2021b-CUDA-11.4.1.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.11.0-foss-2021b-CUDA-11.4.1.eb new file mode 100644 index 000000000000..253cc53a0235 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.11.0-foss-2021b-CUDA-11.4.1.eb @@ -0,0 +1,126 @@ +name = 'PyTorch' +version = '1.11.0' +versionsuffix = '-CUDA-%(cudaver)s' + +homepage = 'https://pytorch.org/' +description = """Tensors and Dynamic neural networks in Python with strong GPU acceleration. +PyTorch is a deep learning framework that puts Python first.""" + +toolchain = {'name': 'foss', 'version': '2021b'} + +sources = [{ + 'filename': '%(name)s-%(version)s.tar.gz', + 'git_config': { + 'url': 'https://github.com/pytorch', + 'repo_name': 'pytorch', + 'tag': 'v%(version)s', + 'recursive': True, + }, +}] +patches = [ + 'PyTorch-1.7.0_avoid-nan-in-test-torch.patch', + 'PyTorch-1.7.0_disable-dev-shm-test.patch', + 'PyTorch-1.8.1_dont-use-gpu-ccc-in-test.patch', + 'PyTorch-1.9.0_limit-world-size-for-zero-redundancy-opt-test.patch', + 'PyTorch-1.10.0_fix-test-dataloader-fixed-affinity.patch', + 'PyTorch-1.10.0_skip_cmake_rpath.patch', + 'PyTorch-1.11.0_increase-distributed-test-timeout.patch', + 'PyTorch-1.11.0_skip_failing_ops_tests.patch', + 'PyTorch-1.11.0_fix_skip_jit_cuda_fuser.patch', + 'PyTorch-1.11.0_fix_sharded_imports.patch', + 'PyTorch-1.11.0_increase_test_tolerances_TF32.patch', + 'PyTorch-1.11.0_increase_c10d_gloo_timeout.patch', + 'PyTorch-1.11.0_disable_failing_jit_cuda_fuser_tests.patch', +] +checksums = [ + None, # can't add proper SHA256 checksum, because source tarball is created locally after recursive 'git clone' + 'b899aa94d9e60f11ee75a706563312ccefa9cf432756c470caa8e623991c8f18', # PyTorch-1.7.0_avoid-nan-in-test-torch.patch + '622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a', # PyTorch-1.7.0_disable-dev-shm-test.patch + '89ac7a8e9e7df2e64cf8404fe3a279f5e9b759fee41c9de3aaff9c22f385c2c6', # PyTorch-1.8.1_dont-use-gpu-ccc-in-test.patch + # PyTorch-1.9.0_limit-world-size-for-zero-redundancy-opt-test.patch + 'ff573660913ce055e24cfd194ce747ba5685091c631cfd443eae2a99d56b57ea', + # PyTorch-1.10.0_fix-test-dataloader-fixed-affinity.patch + '313dca681f45ce3bc7c4557fdcdcbe0b77216d2c708fa30a2ec0e22c44876707', + 'ac05943bb205623f91ef140aa00869efc5fe844184bd666bebf5405808610448', # PyTorch-1.10.0_skip_cmake_rpath.patch + # PyTorch-1.11.0_increase-distributed-test-timeout.patch + '087ad20163a1291773ae3457569b80523080eb3731e210946459b2333a919f3f', + '8eaca92d64fcadb0552d28e9c7ea5c4bc669d2fe33004e45a3519ce8d0d136a2', # PyTorch-1.11.0_skip_failing_ops_tests.patch + '21fc678febcdfbb9dabd72235be23cd392044e9a954f6580d15b530e1f69dcc1', # PyTorch-1.11.0_fix_skip_jit_cuda_fuser.patch + '9a04f4285b800dad8a00c3014af0a9713d40d5dd35d10931c7c0da4e89c558e9', # PyTorch-1.11.0_fix_sharded_imports.patch + # PyTorch-1.11.0_increase_test_tolerances_TF32.patch + '26e179a4f6f57e49209092612ae5f5cd8c03fd2ca84566ba0244eabefc3736ba', + # PyTorch-1.11.0_increase_c10d_gloo_timeout.patch + '20cd4a8663f74ab326fdb032b926bf5c7e94d9750c515ab9050927ba00cf1953', + # PyTorch-1.11.0_disable_failing_jit_cuda_fuser_tests.patch + 'e7bfe120a8b3fe2b40dac6839852a5fbab3cb3429fbe44a0fc3a1800adaaee51', +] + +osdependencies = [OS_PKG_IBVERBS_DEV] + +builddependencies = [ + ('CMake', '3.22.1'), + ('hypothesis', '6.14.6'), +] + +dependencies = [ + ('CUDA', '11.4.1', '', True), + ('Ninja', '1.10.2'), # Required for JIT compilation of C++ extensions + ('Python', '3.9.6'), + ('protobuf', '3.17.3'), + ('protobuf-python', '3.17.3'), + ('pybind11', '2.7.1'), + ('SciPy-bundle', '2021.10'), + ('typing-extensions', '3.10.0.2'), + ('PyYAML', '5.4.1'), + ('MPFR', '4.1.0'), + ('GMP', '6.2.1'), + ('numactl', '2.0.14'), + ('FFmpeg', '4.3.2'), + ('Pillow', '8.3.2'), + ('cuDNN', '8.2.2.26', '-CUDA-%(cudaver)s', True), + ('magma', '2.6.2', '-CUDA-%(cudaver)s'), + ('NCCL', '2.10.3', '-CUDA-%(cudaver)s'), + ('expecttest', '0.1.3'), +] + +# default CUDA compute capabilities to use (override via --cuda-compute-capabilities) +cuda_compute_capabilities = ['3.5', '3.7', '5.2', '6.0', '6.1', '7.0', '7.2', '7.5', '8.0', '8.6'] + +custom_opts = ["USE_CUPTI_SO=1"] + +excluded_tests = { + '': [ + # Bad tests: https://github.com/pytorch/pytorch/issues/60260 + 'distributed/elastic/utils/distributed_test', + 'distributed/elastic/multiprocessing/api_test', + # These tests fail on A10s at the very least, they time out forever no matter how long the timeout is. + # Possibly related to NCCL 2.8.3: https://docs.nvidia.com/deeplearning/nccl/release-notes/rel_2-8-3.html + # 'distributed/test_distributed_fork', + 'distributed/test_distributed_spawn', + # Fails on A10s: https://github.com/pytorch/pytorch/issues/63079 + 'test_optim', + # Test from this suite timeout often. The process group backend is deprecated anyway + # 'distributed/rpc/test_process_group_agent', + # This test fails constently when run as part of the test suite, but succeeds when run interactively + 'test_model_dump', + # These tests appear flaky, possibly related to number of GPUs that are used + 'distributed/fsdp/test_fsdp_memory', + 'distributed/fsdp/test_fsdp_overlap', + ] +} + +runtest = 'cd test && PYTHONUNBUFFERED=1 %(python)s run_test.py --continue-through-error --verbose %(excluded_tests)s' + +# several tests are known to be flaky, and fail in some contexts (like having multiple GPUs available), +# so we allow up to 10 (out of ~90k) tests to fail before treating the installation to be faulty +max_failed_tests = 10 + +# The readelf sanity check command can be taken out once the TestRPATH test from +# https://github.com/pytorch/pytorch/pull/68912 is accepted, since it is then checked as part of the PyTorch test suite +local_libcaffe2 = "$EBROOTPYTORCH/lib/python%%(pyshortver)s/site-packages/torch/lib/libcaffe2_nvrtc.%s" % SHLIB_EXT +sanity_check_commands = [ + "readelf -d %s | egrep 'RPATH|RUNPATH' | grep -v stubs" % local_libcaffe2, +] +tests = ['PyTorch-check-cpp-extension.py'] + +moduleclass = 'devel' From 5e94ee5bdae5cfc0a459034fe59f50179b5c87ee Mon Sep 17 00:00:00 2001 From: sassy Date: Thu, 13 Oct 2022 16:47:23 +0100 Subject: [PATCH 2/6] Moduleclass changed to AI, comment for RTX 6000/Skylake added --- .../p/PyTorch/PyTorch-1.11.0-foss-2021b-CUDA-11.4.1.eb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.11.0-foss-2021b-CUDA-11.4.1.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.11.0-foss-2021b-CUDA-11.4.1.eb index 253cc53a0235..573e2f1df8ac 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.11.0-foss-2021b-CUDA-11.4.1.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.11.0-foss-2021b-CUDA-11.4.1.eb @@ -113,6 +113,7 @@ runtest = 'cd test && PYTHONUNBUFFERED=1 %(python)s run_test.py --continue-throu # several tests are known to be flaky, and fail in some contexts (like having multiple GPUs available), # so we allow up to 10 (out of ~90k) tests to fail before treating the installation to be faulty +# For the RTX 6000 on Skylake, that number is up to 24 max_failed_tests = 10 # The readelf sanity check command can be taken out once the TestRPATH test from @@ -123,4 +124,4 @@ sanity_check_commands = [ ] tests = ['PyTorch-check-cpp-extension.py'] -moduleclass = 'devel' +moduleclass = 'ai' From cdb3bbe7aaaf67982568d45449aba2b4b3dc2454 Mon Sep 17 00:00:00 2001 From: sassy Date: Thu, 13 Oct 2022 17:14:08 +0100 Subject: [PATCH 3/6] True changed to SYSTEM, comment for RTX 6000/Skylake modified --- .../p/PyTorch/PyTorch-1.11.0-foss-2021b-CUDA-11.4.1.eb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.11.0-foss-2021b-CUDA-11.4.1.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.11.0-foss-2021b-CUDA-11.4.1.eb index 573e2f1df8ac..9be556b290cb 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.11.0-foss-2021b-CUDA-11.4.1.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.11.0-foss-2021b-CUDA-11.4.1.eb @@ -63,7 +63,7 @@ builddependencies = [ ] dependencies = [ - ('CUDA', '11.4.1', '', True), + ('CUDA', '11.4.1', '', SYSTEM), ('Ninja', '1.10.2'), # Required for JIT compilation of C++ extensions ('Python', '3.9.6'), ('protobuf', '3.17.3'), @@ -77,7 +77,7 @@ dependencies = [ ('numactl', '2.0.14'), ('FFmpeg', '4.3.2'), ('Pillow', '8.3.2'), - ('cuDNN', '8.2.2.26', '-CUDA-%(cudaver)s', True), + ('cuDNN', '8.2.2.26', '-CUDA-%(cudaver)s', SYSTEM), ('magma', '2.6.2', '-CUDA-%(cudaver)s'), ('NCCL', '2.10.3', '-CUDA-%(cudaver)s'), ('expecttest', '0.1.3'), @@ -113,7 +113,7 @@ runtest = 'cd test && PYTHONUNBUFFERED=1 %(python)s run_test.py --continue-throu # several tests are known to be flaky, and fail in some contexts (like having multiple GPUs available), # so we allow up to 10 (out of ~90k) tests to fail before treating the installation to be faulty -# For the RTX 6000 on Skylake, that number is up to 24 +# For the RTX 6000 on Skylake, that number might be up to 24 max_failed_tests = 10 # The readelf sanity check command can be taken out once the TestRPATH test from From 712f2fe677e8e33e06846cfd246a7e13d5025646 Mon Sep 17 00:00:00 2001 From: sassy Date: Tue, 18 Oct 2022 09:27:30 +0100 Subject: [PATCH 4/6] Suggested patches from #16339 --- .../PyTorch-1.11.0-foss-2021b-CUDA-11.4.1.eb | 65 ++++++++++++++----- 1 file changed, 48 insertions(+), 17 deletions(-) diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.11.0-foss-2021b-CUDA-11.4.1.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.11.0-foss-2021b-CUDA-11.4.1.eb index 9be556b290cb..ceb262449b2a 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.11.0-foss-2021b-CUDA-11.4.1.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.11.0-foss-2021b-CUDA-11.4.1.eb @@ -17,42 +17,73 @@ sources = [{ 'recursive': True, }, }] + patches = [ 'PyTorch-1.7.0_avoid-nan-in-test-torch.patch', 'PyTorch-1.7.0_disable-dev-shm-test.patch', 'PyTorch-1.8.1_dont-use-gpu-ccc-in-test.patch', - 'PyTorch-1.9.0_limit-world-size-for-zero-redundancy-opt-test.patch', + 'PyTorch-1.10.0_fix-kineto-crash.patch', 'PyTorch-1.10.0_fix-test-dataloader-fixed-affinity.patch', + 'PyTorch-1.10.0_fix-test-model_dump.patch', + 'PyTorch-1.10.0_fix-vsx-vector-functions.patch', + 'PyTorch-1.10.0_fix-XNNPACK-tests.patch', 'PyTorch-1.10.0_skip_cmake_rpath.patch', - 'PyTorch-1.11.0_increase-distributed-test-timeout.patch', - 'PyTorch-1.11.0_skip_failing_ops_tests.patch', - 'PyTorch-1.11.0_fix_skip_jit_cuda_fuser.patch', + 'PyTorch-1.10.0_skip-nnapi-test-without-qnnpack.patch', + 'PyTorch-1.11.0_disable_failing_jit_cuda_fuser_tests.patch', + 'PyTorch-1.11.0_fix-attention_cpp-compilation.patch', + 'PyTorch-1.11.0_fix-fsdp-fp16-test.patch', 'PyTorch-1.11.0_fix_sharded_imports.patch', - 'PyTorch-1.11.0_increase_test_tolerances_TF32.patch', + 'PyTorch-1.11.0_fix_skip_jit_cuda_fuser.patch', + 'PyTorch-1.11.0_fix-test_utils.patch', 'PyTorch-1.11.0_increase_c10d_gloo_timeout.patch', - 'PyTorch-1.11.0_disable_failing_jit_cuda_fuser_tests.patch', + 'PyTorch-1.11.0_increase-distributed-test-timeout.patch', + 'PyTorch-1.11.0_increase_test_tolerances_TF32.patch', + 'PyTorch-1.11.0_increase-tolerance-test_ops.patch', + 'PyTorch-1.11.0_install-vsx-vec-headers.patch', + 'PyTorch-1.11.0_skip_failing_ops_tests.patch', + 'PyTorch-1.11.1_skip-test_init_from_local_shards.patch', + 'PyTorch-1.11.1_skip-test_sibling_fusion.patch', ] checksums = [ None, # can't add proper SHA256 checksum, because source tarball is created locally after recursive 'git clone' 'b899aa94d9e60f11ee75a706563312ccefa9cf432756c470caa8e623991c8f18', # PyTorch-1.7.0_avoid-nan-in-test-torch.patch '622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a', # PyTorch-1.7.0_disable-dev-shm-test.patch '89ac7a8e9e7df2e64cf8404fe3a279f5e9b759fee41c9de3aaff9c22f385c2c6', # PyTorch-1.8.1_dont-use-gpu-ccc-in-test.patch - # PyTorch-1.9.0_limit-world-size-for-zero-redundancy-opt-test.patch - 'ff573660913ce055e24cfd194ce747ba5685091c631cfd443eae2a99d56b57ea', + # PyTorch-1.10.0_fix-kineto-crash.patch + 'dc467333b28162149af8f675929d8c6bf219f23230bfc0d39af02ba4f6f882eb', # PyTorch-1.10.0_fix-test-dataloader-fixed-affinity.patch '313dca681f45ce3bc7c4557fdcdcbe0b77216d2c708fa30a2ec0e22c44876707', + # PyTorch-1.10.0_fix-test-model_dump.patch + '339148ae1a028cda6e750ac93fa38a599f66c7abe26586c9219f1a206ea14557', + # PyTorch-1.10.0_fix-vsx-vector-functions.patch + '7bef5f96cb83b2d655d2f76dd7468a171d446f0b3e06da2232ec7f886484d312', + # PyTorch-1.10.0_fix-XNNPACK-tests.patch + 'd3e749a2a42efce463e3b8a1aebb21f0edf2256682c4417297d9a44a6210e5f8', 'ac05943bb205623f91ef140aa00869efc5fe844184bd666bebf5405808610448', # PyTorch-1.10.0_skip_cmake_rpath.patch + # PyTorch-1.10.0_skip-nnapi-test-without-qnnpack.patch + '34ba476a7bcddec323bf9eca083cb4623d0f569d081aa3add3769c24f22849d2', + # PyTorch-1.11.0_disable_failing_jit_cuda_fuser_tests.patch + 'e7bfe120a8b3fe2b40dac6839852a5fbab3cb3429fbe44a0fc3a1800adaaee51', + # PyTorch-1.11.0_fix-attention_cpp-compilation.patch + '84214fcc7e30cf70659a7c3bd70bf11e73d58fd4f7fff2c233e3225619b0e42c', + 'bb1c4e6d6fd4b0cf57ff8b824c797331b533bb1ffc63f5db0bae3aee10c3dc13', # PyTorch-1.11.0_fix-fsdp-fp16-test.patch + '9a04f4285b800dad8a00c3014af0a9713d40d5dd35d10931c7c0da4e89c558e9', # PyTorch-1.11.0_fix_sharded_imports.patch + '21fc678febcdfbb9dabd72235be23cd392044e9a954f6580d15b530e1f69dcc1', # PyTorch-1.11.0_fix_skip_jit_cuda_fuser.patch + '4f7e25c4e2eb7094f92607df74488c6a4a35849fabf05fcf6c3655fa3f44a861', # PyTorch-1.11.0_fix-test_utils.patch + # PyTorch-1.11.0_increase_c10d_gloo_timeout.patch + '20cd4a8663f74ab326fdb032b926bf5c7e94d9750c515ab9050927ba00cf1953', # PyTorch-1.11.0_increase-distributed-test-timeout.patch '087ad20163a1291773ae3457569b80523080eb3731e210946459b2333a919f3f', - '8eaca92d64fcadb0552d28e9c7ea5c4bc669d2fe33004e45a3519ce8d0d136a2', # PyTorch-1.11.0_skip_failing_ops_tests.patch - '21fc678febcdfbb9dabd72235be23cd392044e9a954f6580d15b530e1f69dcc1', # PyTorch-1.11.0_fix_skip_jit_cuda_fuser.patch - '9a04f4285b800dad8a00c3014af0a9713d40d5dd35d10931c7c0da4e89c558e9', # PyTorch-1.11.0_fix_sharded_imports.patch # PyTorch-1.11.0_increase_test_tolerances_TF32.patch '26e179a4f6f57e49209092612ae5f5cd8c03fd2ca84566ba0244eabefc3736ba', - # PyTorch-1.11.0_increase_c10d_gloo_timeout.patch - '20cd4a8663f74ab326fdb032b926bf5c7e94d9750c515ab9050927ba00cf1953', - # PyTorch-1.11.0_disable_failing_jit_cuda_fuser_tests.patch - 'e7bfe120a8b3fe2b40dac6839852a5fbab3cb3429fbe44a0fc3a1800adaaee51', + # PyTorch-1.11.0_increase-tolerance-test_ops.patch + 'ceec745c68a405bba79efb4dc61c662ca84eb950cd0163c7104330f4bf614cf5', + 'f2e6b9625733d9a471bb75e1ea20e28814cf1380b4f9089aa838ee35ddecf07d', # PyTorch-1.11.0_install-vsx-vec-headers.patch + '8eaca92d64fcadb0552d28e9c7ea5c4bc669d2fe33004e45a3519ce8d0d136a2', # PyTorch-1.11.0_skip_failing_ops_tests.patch + # PyTorch-1.11.1_skip-test_init_from_local_shards.patch + '4aeb1b0bc863d4801b0095cbce69f8794066748f0df27c6aaaf729c5ecba04b7', + # PyTorch-1.11.1_skip-test_sibling_fusion.patch + '3d6f6395d98e8e4ad76b0b63c625fddf082cf7f066eb97d4d82401f96dab2555', ] osdependencies = [OS_PKG_IBVERBS_DEV] @@ -96,13 +127,13 @@ excluded_tests = { # These tests fail on A10s at the very least, they time out forever no matter how long the timeout is. # Possibly related to NCCL 2.8.3: https://docs.nvidia.com/deeplearning/nccl/release-notes/rel_2-8-3.html # 'distributed/test_distributed_fork', - 'distributed/test_distributed_spawn', + # 'distributed/test_distributed_spawn', # Fails on A10s: https://github.com/pytorch/pytorch/issues/63079 'test_optim', # Test from this suite timeout often. The process group backend is deprecated anyway # 'distributed/rpc/test_process_group_agent', # This test fails constently when run as part of the test suite, but succeeds when run interactively - 'test_model_dump', + # 'test_model_dump', # These tests appear flaky, possibly related to number of GPUs that are used 'distributed/fsdp/test_fsdp_memory', 'distributed/fsdp/test_fsdp_overlap', From 51348bb329c59f1cbe6f9c304b3e2a92ce15fae8 Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Fri, 25 Nov 2022 15:19:12 +0100 Subject: [PATCH 5/6] don't use git_config to donwload source tarball for PyTorch 1.11.0, download provided full source tarball from GitHub instead --- .../PyTorch-1.11.0-foss-2021b-CUDA-11.4.1.eb | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.11.0-foss-2021b-CUDA-11.4.1.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.11.0-foss-2021b-CUDA-11.4.1.eb index ceb262449b2a..8a62094e6afc 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.11.0-foss-2021b-CUDA-11.4.1.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.11.0-foss-2021b-CUDA-11.4.1.eb @@ -8,15 +8,8 @@ PyTorch is a deep learning framework that puts Python first.""" toolchain = {'name': 'foss', 'version': '2021b'} -sources = [{ - 'filename': '%(name)s-%(version)s.tar.gz', - 'git_config': { - 'url': 'https://github.com/pytorch', - 'repo_name': 'pytorch', - 'tag': 'v%(version)s', - 'recursive': True, - }, -}] +source_urls = ['https://github.com/%(github_account)s/%(namelower)s/releases/download/v%(version)s'] +sources = ['%(namelower)s-v%(version)s.tar.gz'] patches = [ 'PyTorch-1.7.0_avoid-nan-in-test-torch.patch', @@ -45,7 +38,7 @@ patches = [ 'PyTorch-1.11.1_skip-test_sibling_fusion.patch', ] checksums = [ - None, # can't add proper SHA256 checksum, because source tarball is created locally after recursive 'git clone' + 'dc0c2b8d13c112a2b9ea8757a475b0ce2ca97cd19c50a8b70b8c286676616f1d', # pytorch-v1.11.0.tar.gz 'b899aa94d9e60f11ee75a706563312ccefa9cf432756c470caa8e623991c8f18', # PyTorch-1.7.0_avoid-nan-in-test-torch.patch '622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a', # PyTorch-1.7.0_disable-dev-shm-test.patch '89ac7a8e9e7df2e64cf8404fe3a279f5e9b759fee41c9de3aaff9c22f385c2c6', # PyTorch-1.8.1_dont-use-gpu-ccc-in-test.patch From 8b58193ef71c5529e0bb3d69bd3e9b5772430d82 Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Sat, 14 Jan 2023 20:16:30 +0100 Subject: [PATCH 6/6] add patches to fix/skip failing tests for PyTorch 1.11.0 + drop typing-extensions dependency (already included with Python) --- .../p/PyTorch/PyTorch-1.11.0-foss-2021b-CUDA-11.4.1.eb | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.11.0-foss-2021b-CUDA-11.4.1.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.11.0-foss-2021b-CUDA-11.4.1.eb index 8a62094e6afc..06a9966db132 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.11.0-foss-2021b-CUDA-11.4.1.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.11.0-foss-2021b-CUDA-11.4.1.eb @@ -10,7 +10,6 @@ toolchain = {'name': 'foss', 'version': '2021b'} source_urls = ['https://github.com/%(github_account)s/%(namelower)s/releases/download/v%(version)s'] sources = ['%(namelower)s-v%(version)s.tar.gz'] - patches = [ 'PyTorch-1.7.0_avoid-nan-in-test-torch.patch', 'PyTorch-1.7.0_disable-dev-shm-test.patch', @@ -36,6 +35,9 @@ patches = [ 'PyTorch-1.11.0_skip_failing_ops_tests.patch', 'PyTorch-1.11.1_skip-test_init_from_local_shards.patch', 'PyTorch-1.11.1_skip-test_sibling_fusion.patch', + 'PyTorch-1.12.1_skip-test_round_robin.patch', + 'PyTorch-1.12.1_fix-test_wishart_log_prob.patch', + 'PyTorch-1.12.1_fix-autograd-thread_shutdown-test.patch', ] checksums = [ 'dc0c2b8d13c112a2b9ea8757a475b0ce2ca97cd19c50a8b70b8c286676616f1d', # pytorch-v1.11.0.tar.gz @@ -77,6 +79,11 @@ checksums = [ '4aeb1b0bc863d4801b0095cbce69f8794066748f0df27c6aaaf729c5ecba04b7', # PyTorch-1.11.1_skip-test_sibling_fusion.patch '3d6f6395d98e8e4ad76b0b63c625fddf082cf7f066eb97d4d82401f96dab2555', + '63d4849b78605aa088fdff695637d9473ea60dee603a3ff7f788690d70c55349', # PyTorch-1.12.1_skip-test_round_robin.patch + # PyTorch-1.12.1_fix-test_wishart_log_prob.patch + 'cf475ae6e6234b96c8d1bf917597c5176c94b3ccd940b72f2e1cd0c979580f45', + # PyTorch-1.12.1_fix-autograd-thread_shutdown-test.patch + 'd97cd6b0570a167ecc3e631dc4ea884d95ace285cc38aa980566f4fec2c0d089', ] osdependencies = [OS_PKG_IBVERBS_DEV] @@ -94,7 +101,6 @@ dependencies = [ ('protobuf-python', '3.17.3'), ('pybind11', '2.7.1'), ('SciPy-bundle', '2021.10'), - ('typing-extensions', '3.10.0.2'), ('PyYAML', '5.4.1'), ('MPFR', '4.1.0'), ('GMP', '6.2.1'),