diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb index b6a1e3ca5b2b..645b2fb8883d 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb @@ -12,7 +12,6 @@ sources = ['%(namelower)s-v%(version)s.tar.gz'] patches = [ 'PyTorch-1.7.0_disable-dev-shm-test.patch', 'PyTorch-1.10.0_fix-kineto-crash.patch', - 'PyTorch-1.11.0_fix-fsdp-fp16-test.patch', 'PyTorch-1.11.1_skip-test_init_from_local_shards.patch', 'PyTorch-1.12.1_add-hypothesis-suppression.patch', 'PyTorch-1.12.1_fix-skip-decorators.patch', @@ -20,23 +19,23 @@ patches = [ 'PyTorch-1.12.1_fix-test_wishart_log_prob.patch', 'PyTorch-1.12.1_fix-TestTorch.test_to.patch', 'PyTorch-1.12.1_fix-use-after-free-in-tensorpipe-agent.patch', - 'PyTorch-1.12.1_fix-vsx-vector-funcs.patch', 'PyTorch-1.12.1_fix-vsx-loadu.patch', + 'PyTorch-1.12.1_fix-vsx-vector-funcs.patch', 'PyTorch-1.12.1_skip-test_round_robin.patch', + 'PyTorch-1.13.1_fix-fsdp-fp16-test.patch', 'PyTorch-1.13.1_fix-pytest-args.patch', 'PyTorch-1.13.1_fix-test-ops-conf.patch', - 'PyTorch-1.13.1_no-cuda-stubs-rpath.patch', - 'PyTorch-1.13.1_remove-flaky-test-in-testnn.patch', - 'PyTorch-1.13.1_skip-ao-sparsity-test-without-fbgemm.patch', 'PyTorch-1.13.1_increase-tolerance-test_ops.patch', 'PyTorch-1.13.1_install-vsx-vec-headers.patch', + 'PyTorch-1.13.1_no-cuda-stubs-rpath.patch', + 'PyTorch-1.13.1_remove-flaky-test-in-testnn.patch', 'PyTorch-1.13.1_skip-failing-grad-test.patch', + 'PyTorch-1.13.1_skip-tests-without-fbgemm.patch', ] checksums = [ {'pytorch-v1.13.1.tar.gz': 'dbc229ee9750b02b514937d017744443a269ea0241ed3f32b9af0703589d25d4'}, {'PyTorch-1.7.0_disable-dev-shm-test.patch': '622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a'}, {'PyTorch-1.10.0_fix-kineto-crash.patch': 'dc467333b28162149af8f675929d8c6bf219f23230bfc0d39af02ba4f6f882eb'}, - {'PyTorch-1.11.0_fix-fsdp-fp16-test.patch': 'bb1c4e6d6fd4b0cf57ff8b824c797331b533bb1ffc63f5db0bae3aee10c3dc13'}, {'PyTorch-1.11.1_skip-test_init_from_local_shards.patch': '4aeb1b0bc863d4801b0095cbce69f8794066748f0df27c6aaaf729c5ecba04b7'}, {'PyTorch-1.12.1_add-hypothesis-suppression.patch': @@ -49,21 +48,22 @@ checksums = [ {'PyTorch-1.12.1_fix-TestTorch.test_to.patch': '75f27987c3f25c501e719bd2b1c70a029ae0ee28514a97fe447516aee02b1535'}, {'PyTorch-1.12.1_fix-use-after-free-in-tensorpipe-agent.patch': '0bd7e88b92c4c6f0fecf01746009858ba19f2df68b10b88c41485328a531875d'}, - {'PyTorch-1.12.1_fix-vsx-vector-funcs.patch': 'caccbf60f62eac313896c1eaec78b08f5d0fdfcb907079087490bb13d1561aa2'}, {'PyTorch-1.12.1_fix-vsx-loadu.patch': '8bfe3c94ada1dd1f7974a1261a8b576fb7ae944050fa1c7830fca033831123b2'}, + {'PyTorch-1.12.1_fix-vsx-vector-funcs.patch': 'caccbf60f62eac313896c1eaec78b08f5d0fdfcb907079087490bb13d1561aa2'}, {'PyTorch-1.12.1_skip-test_round_robin.patch': '63d4849b78605aa088fdff695637d9473ea60dee603a3ff7f788690d70c55349'}, + {'PyTorch-1.13.1_fix-fsdp-fp16-test.patch': '8ae68e60d6e1f92f50322b7f0381c7e65251fba32d7606e3a238a36a2f55b5cf'}, {'PyTorch-1.13.1_fix-pytest-args.patch': 'd3e3c841cf8d73683750f29326f2be56ee0bb5df7ff522baf7d7c3f301a91ec2'}, {'PyTorch-1.13.1_fix-test-ops-conf.patch': 'df652eec7753864ebebbfeca546929a53e3fb8f24259d5c9b964266a8551198c'}, - {'PyTorch-1.13.1_no-cuda-stubs-rpath.patch': '4c636059850fc9d1ecb27ce275f8aad5d5b6fdc19e35aff0c25b86cb3201352a'}, - {'PyTorch-1.13.1_remove-flaky-test-in-testnn.patch': - 'be83ff61fe2dedab6d49c232936d5622df81ab49154264490021c6c828e53315'}, - {'PyTorch-1.13.1_skip-ao-sparsity-test-without-fbgemm.patch': - '92cd48ef6d01aa7e07ccce1dcaf40bc3fb0f220c4aa4fea15f3e05fb42e37909'}, {'PyTorch-1.13.1_increase-tolerance-test_ops.patch': - 'd53e98bf0da7788b68042dcc31bc5708dae962fde3f110cc827eb807a5d08e49'}, + 'c909fdfc2b12df457e1eb5514265ffec3eab653994949416f3f048668421e223'}, {'PyTorch-1.13.1_install-vsx-vec-headers.patch': '7b678f54bb947afd4767f5877ac424b4b94ce5db609ea20f5a869ccf4027035f'}, + {'PyTorch-1.13.1_no-cuda-stubs-rpath.patch': '4c636059850fc9d1ecb27ce275f8aad5d5b6fdc19e35aff0c25b86cb3201352a'}, + {'PyTorch-1.13.1_remove-flaky-test-in-testnn.patch': + 'be83ff61fe2dedab6d49c232936d5622df81ab49154264490021c6c828e53315'}, {'PyTorch-1.13.1_skip-failing-grad-test.patch': '6681200f9509893cb9231b5c93ac9bc5e6d9d9ae4febefca52e7cbc843ba8f51'}, + {'PyTorch-1.13.1_skip-tests-without-fbgemm.patch': + '481e595f673baf8ae58b41697a6792b83048b0264aa79b422f48cd8c22948bb7'}, ] osdependencies = [OS_PKG_IBVERBS_DEV] diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_fix-fsdp-fp16-test.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_fix-fsdp-fp16-test.patch new file mode 100644 index 000000000000..567103a376b7 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_fix-fsdp-fp16-test.patch @@ -0,0 +1,21 @@ +The test fails on a node with more than 5 V100 GPUs or more than 4 A100 GPUs. +Hence limit the world_size to 4 +See https://github.com/pytorch/pytorch/pull/86280 + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/distributed/fsdp/test_fsdp_pure_fp16.py b/test/distributed/fsdp/test_fsdp_pure_fp16.py +index 1c663f8263354..e0033ef3d4b72 100644 +--- a/test/distributed/fsdp/test_fsdp_pure_fp16.py ++++ b/test/distributed/fsdp/test_fsdp_pure_fp16.py +@@ -34,8 +34,8 @@ + class TestPureFP16(FSDPTest): + @property + def world_size(self): +- # Test fails due to inaccuracies when using more than 5 GPUs +- return min(5, super().world_size) ++ # Test fails due to inaccuracies when using more than 4 GPUs ++ return min(4, super().world_size) + + @skip_if_lt_x_gpu(2) + @parametrize( diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_increase-tolerance-test_ops.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_increase-tolerance-test_ops.patch index fb28aadbefa1..b07becd652e8 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_increase-tolerance-test_ops.patch +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_increase-tolerance-test_ops.patch @@ -4,18 +4,24 @@ > Greatest absolute difference: 1.430511474609375e-05 at index (4, 5) (up to 1e-05 allowed) > Greatest relative difference: 4.65393206065873e-06 at index (4, 5) (up to 1.3e-06 allowed) +See https://github.com/pytorch/pytorch/pull/86365 + Author: Alexander Grund (TU Dresden) -Updated for PyTorch 1.13.1: Simon Branford (University of Birmingham) ---- a/test/test_ops.py -+++ b/test/test_ops.py -@@ -545,6 +545,9 @@ - else list(supported_dtypes)[0] - ) - -+ if dtype is torch.float32: -+ self.precision, self.rel_tol = (1.5e-05, 1e-05) -+ - samples = op.sample_inputs(device, dtype) - for sample in samples: - # calls it normally to get the expected result +diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py +index 4b2d0ebabc46b..bab7843a72b74 100644 +--- a/torch/testing/_internal/common_methods_invocations.py ++++ b/torch/testing/_internal/common_methods_invocations.py +@@ -8503,7 +8503,11 @@ op_db: List[OpInfo] = [ + DecorateInfo( + toleranceOverride({torch.float32: tol(atol=1.3e-05, rtol=1.3e-05), + torch.complex64: tol(atol=1e-05, rtol=1.2e-03)}), +- 'TestCommon', 'test_numpy_refs')], ++ 'TestCommon', 'test_numpy_refs'), ++ DecorateInfo( ++ toleranceOverride({torch.float32: tol(atol=1.5e-05, rtol=1e-05)}), ++ 'TestCommon', 'test_out'), ++ ], + skips=( + # NVIDIA only assures that bfloat16 is supported by bmm if SM >= 5.3 + DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_dtypes', device_type='cuda', active_if=not SM53OrLater), diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_skip-ao-sparsity-test-without-fbgemm.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_skip-ao-sparsity-test-without-fbgemm.patch deleted file mode 100644 index 481b013de21c..000000000000 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_skip-ao-sparsity-test-without-fbgemm.patch +++ /dev/null @@ -1,26 +0,0 @@ -Those tests (from test_ao_sparsity) require FBGEMM which may not be available. -So add the skip decorator. -See https://github.com/pytorch/pytorch/issues/87364 - -Author: Alexander Grund (TU Dresden) - -diff --git a/test/ao/sparsity/test_composability.py b/test/ao/sparsity/test_composability.py -index 6a1b6067a4c..b2eed72e3e3 100644 ---- a/test/ao/sparsity/test_composability.py -+++ b/test/ao/sparsity/test_composability.py -@@ -9,6 +9,7 @@ import torch.ao.quantization as tq - from torch import nn - from torch.ao import sparsity - from torch.testing._internal.common_utils import TestCase -+from torch.testing._internal.common_quantization import skipIfNoFBGEMM - from torch.ao.quantization.quantize_fx import prepare_fx, convert_fx, convert_to_reference_fx, prepare_qat_fx - from torch.ao.sparsity import fqn_to_module - -@@ -62,6 +63,7 @@ def _calculate_sparsity(tensor): - # This series of tests are to check the composability goals for sparsity and quantization. Namely - # that performing quantization and sparsity model manipulations in various orderings - # does not cause problems -+@skipIfNoFBGEMM - class TestComposability(TestCase): - # This test checks whether performing quantization prepare before sparse prepare - # causes any issues and verifies that the correct observers are inserted and that diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_skip-tests-without-fbgemm.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_skip-tests-without-fbgemm.patch new file mode 100644 index 000000000000..77dde5f31100 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_skip-tests-without-fbgemm.patch @@ -0,0 +1,68 @@ +Those tests (from test_ao_sparsity & test_quantization) require FBGEMM which may not be available. +So add the skip decorator. +See https://github.com/pytorch/pytorch/issues/87364 + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/ao/sparsity/test_composability.py b/test/ao/sparsity/test_composability.py +index 6a1b6067a4c..0c43f585af2 100644 +--- a/test/ao/sparsity/test_composability.py ++++ b/test/ao/sparsity/test_composability.py +@@ -9,6 +9,7 @@ import torch.ao.quantization as tq + from torch import nn + from torch.ao import sparsity + from torch.testing._internal.common_utils import TestCase ++from torch.testing._internal.common_quantization import skipIfNoFBGEMM + from torch.ao.quantization.quantize_fx import prepare_fx, convert_fx, convert_to_reference_fx, prepare_qat_fx + from torch.ao.sparsity import fqn_to_module + +@@ -62,6 +63,7 @@ def _calculate_sparsity(tensor): + # This series of tests are to check the composability goals for sparsity and quantization. Namely + # that performing quantization and sparsity model manipulations in various orderings + # does not cause problems ++@skipIfNoFBGEMM + class TestComposability(TestCase): + # This test checks whether performing quantization prepare before sparse prepare + # causes any issues and verifies that the correct observers are inserted and that +@@ -326,6 +328,7 @@ class TestFxComposability(TestCase): + r"""This series of tests checks that various steps of the quantization and sparsity flow + compose cleanly despite variation in sequencing. + """ ++ @skipIfNoFBGEMM + def test_q_prep_fx_before_s_prep(self): + r""" + This test checks that the ordering of prepare_fx -> sparse prepare -> convert_fx +@@ -445,6 +448,7 @@ class TestFxComposability(TestCase): + ) + self.assertGreaterAlmostEqual(cur_sparsity, sparse_config[0]["sparsity_level"]) + ++ @skipIfNoFBGEMM + def test_s_prep_before_q_prep_fx(self): + r""" + This test checks that the ordering of sparse prepare -> prepare_fx -> convert_fx +@@ -490,6 +494,7 @@ class TestFxComposability(TestCase): + ) + self.assertGreaterAlmostEqual(cur_sparsity, sparse_config[0]["sparsity_level"]) + ++ @skipIfNoFBGEMM + def test_s_prep_before_qat_prep_fx(self): + r""" + This test checks that the ordering of sparse prepare -> prepare_qat_fx -> convert_fx +diff --git a/test/quantization/core/test_docs.py b/test/quantization/core/test_docs.py +index 27842b46ce7..8e50ffa3166 100644 +--- a/test/quantization/core/test_docs.py ++++ b/test/quantization/core/test_docs.py +@@ -10,11 +10,13 @@ import torch + from torch.testing._internal.common_quantization import ( + QuantizationTestCase, + SingleLayerLinearModel, ++ skipIfNoFBGEMM, + ) + from torch.testing._internal.common_quantized import override_quantized_engine + from torch.testing._internal.common_utils import IS_ARM64 + + ++@skipIfNoFBGEMM + class TestQuantizationDocs(QuantizationTestCase): + r""" + The tests in this section import code from the quantization docs and check that